diff --git a/CHANGELOG.md b/CHANGELOG.md index df309681..93e558a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,26 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.4] - 2026-05-22 + +### Fixed +- **`Fatal process out of memory: Zone` crash while indexing large projects.** + On Node.js 22 and 24 — including CodeGraph's own bundled runtime — running + `codegraph index` / `codegraph init` on a large multi-language repo could + abort the entire process partway through parsing with + `Fatal process out of memory: Zone`, even with tens of GB of RAM free (the + failure is in a V8-internal compilation arena, not the JS heap). The cause is + V8's "turboshaft" optimizing WASM compiler exhausting its Zone budget while + compiling tree-sitter's large WebAssembly grammars on a background thread. + CodeGraph now runs with V8's `--liftoff-only`, which keeps grammar compilation + on the baseline compiler and never reaches the optimizing tier, eliminating + the crash; indexing output is otherwise unchanged. The bundled launcher passes + the flag directly, and any other launch path (from source, `npx`, a globally + linked dev build) re-execs once with it automatically. Resolves + [#298](https://github.com/colbymchenry/codegraph/issues/298) and + [#293](https://github.com/colbymchenry/codegraph/issues/293). (Node 25 stays + blocked — its variant of this V8 bug is not resolved by `--liftoff-only`.) + ## [0.9.3] - 2026-05-22 ### Added @@ -116,6 +136,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). find its bundle. The release pipeline now verifies every package reached the registry (and is idempotent), so a release can't pass green-but-broken again. +[0.9.4]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.4 [0.9.3]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.3 [0.9.2]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.2 [0.9.1]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.1 diff --git a/__tests__/wasm-runtime-flags.test.ts b/__tests__/wasm-runtime-flags.test.ts new file mode 100644 index 00000000..a4dae8bb --- /dev/null +++ b/__tests__/wasm-runtime-flags.test.ts @@ -0,0 +1,87 @@ +/** + * WASM runtime flags — the workaround for the V8 turboshaft WASM Zone OOM + * (`Fatal process out of memory: Zone`) that crashed `codegraph index` on large + * polyglot repos under Node >= 22. See issues #293 and #298. + * + * The crash was reproduced with the real indexer on the bundled Node 24 runtime; + * empirically only `--liftoff-only` prevents it (`--no-wasm-tier-up` / + * `--no-wasm-dynamic-tiering` do not), and the flag must be on node's command + * line — `setFlagsFromString`, worker `execArgv`, and `NODE_OPTIONS` all fail. + * These tests pin that contract so it can't silently regress. + */ +import { describe, it, expect } from 'vitest'; +import { spawnSync } from 'child_process'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { + WASM_RUNTIME_FLAGS, + processHasWasmRuntimeFlags, + buildRelaunchArgv, +} from '../src/extraction/wasm-runtime-flags'; + +describe('WASM_RUNTIME_FLAGS', () => { + it('pins --liftoff-only (the only flag shown to stop the turboshaft Zone OOM)', () => { + // On Node 24, --no-wasm-tier-up and --no-wasm-dynamic-tiering both still + // crash; only --liftoff-only forces grammars onto the Liftoff baseline and + // off the optimizing tier. Pin it so it can't be swapped for an ineffective + // flag. + expect(WASM_RUNTIME_FLAGS).toContain('--liftoff-only'); + }); + + it('every flag is a real, accepted flag on the running Node/V8 runtime', () => { + // node rejects unknown CLI flags at startup, so a renamed/removed flag would + // break the bundled launcher and make the relaunch guard a silent no-op. + // Prove each flag actually launches node here. + const res = spawnSync( + process.execPath, + [...WASM_RUNTIME_FLAGS, '-e', 'process.exit(0)'], + { encoding: 'utf8' } + ); + expect(res.status, `node rejected ${WASM_RUNTIME_FLAGS.join(' ')}:\n${res.stderr}`).toBe(0); + }); +}); + +describe('processHasWasmRuntimeFlags', () => { + it('is true only when every required flag is present', () => { + expect(processHasWasmRuntimeFlags(['--liftoff-only'])).toBe(true); + expect(processHasWasmRuntimeFlags(['--liftoff-only', '--enable-source-maps'])).toBe(true); + }); + + it('is false when the flags are absent', () => { + expect(processHasWasmRuntimeFlags([])).toBe(false); + expect(processHasWasmRuntimeFlags(['--max-old-space-size=4096'])).toBe(false); + }); +}); + +describe('buildRelaunchArgv', () => { + it('places the wasm flags first, then the script and its args', () => { + expect(buildRelaunchArgv('/x/codegraph.js', ['index', '/repo'], [])).toEqual([ + '--liftoff-only', + '/x/codegraph.js', + 'index', + '/repo', + ]); + }); + + it('preserves other existing node flags without duplicating ours', () => { + expect( + buildRelaunchArgv('/x/codegraph.js', ['status'], ['--liftoff-only', '--enable-source-maps']) + ).toEqual(['--liftoff-only', '--enable-source-maps', '/x/codegraph.js', 'status']); + }); + + it('produces an argv that actually launches node WITH the flag applied', () => { + // End-to-end proof of the delivery mechanism without needing the crash: + // run the constructed argv and confirm the child sees the flag in execArgv. + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-relaunch-')); + try { + const harness = path.join(dir, 'harness.cjs'); + fs.writeFileSync(harness, 'process.stdout.write(JSON.stringify(process.execArgv));'); + const res = spawnSync(process.execPath, buildRelaunchArgv(harness, []), { encoding: 'utf8' }); + expect(res.status, res.stderr).toBe(0); + expect(JSON.parse(res.stdout)).toContain('--liftoff-only'); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/package-lock.json b/package-lock.json index 36c592b1..cad34c1b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", diff --git a/package.json b/package.json index f813c1e6..5455ced9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "description": "Supercharge Claude Code with semantic code intelligence. 94% fewer tool calls • 77% faster exploration • 100% local.", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/scripts/build-bundle.sh b/scripts/build-bundle.sh index a00f3369..120ac981 100755 --- a/scripts/build-bundle.sh +++ b/scripts/build-bundle.sh @@ -70,9 +70,18 @@ rm -f "$STAGE/lib/package-lock.json" # 4. Vendored Node + launcher (the launcher uses the bundled Node by relative # path, so no system Node is ever needed). +# +# `--liftoff-only`: keep tree-sitter's large WASM grammars on V8's Liftoff +# baseline compiler so they never reach the turboshaft optimizing tier, whose +# per-compilation Zone arena OOMs the whole process (`Fatal process out of +# memory: Zone`) on Node >= 22 — even with tens of GB free. The flag is read at +# V8 engine init so it must be on node's command line; the parse worker inherits +# it. See issues #293/#298 and src/extraction/wasm-runtime-flags.ts. (The CLI +# also self-relaunches with this flag when launched without it, so non-bundled +# runs are covered too; passing it here avoids that extra spawn.) if [ "$OSFAM" = "win32" ]; then cp "$NODE_BIN" "$STAGE/node.exe" - printf '@"%%~dp0..\\node.exe" "%%~dp0..\\lib\\dist\\bin\\codegraph.js" %%*\r\n' \ + printf '@"%%~dp0..\\node.exe" --liftoff-only "%%~dp0..\\lib\\dist\\bin\\codegraph.js" %%*\r\n' \ > "$STAGE/bin/codegraph.cmd" else cp "$NODE_BIN" "$STAGE/node" @@ -89,7 +98,8 @@ while [ -L "$SELF" ]; do esac done DIR="$(cd "$(dirname "$SELF")/.." && pwd)" -exec "$DIR/node" "$DIR/lib/dist/bin/codegraph.js" "$@" +# --liftoff-only: avoid the V8 turboshaft WASM Zone OOM (issues #293/#298). +exec "$DIR/node" --liftoff-only "$DIR/lib/dist/bin/codegraph.js" "$@" LAUNCH chmod +x "$STAGE/bin/codegraph" fi diff --git a/scripts/npm-shim.js b/scripts/npm-shim.js index bea905f3..81012124 100755 --- a/scripts/npm-shim.js +++ b/scripts/npm-shim.js @@ -31,7 +31,10 @@ try { if (isWindows) { command = require.resolve(pkg + '/node.exe'); var entry = require.resolve(pkg + '/lib/dist/bin/codegraph.js'); - args = [entry].concat(process.argv.slice(2)); + // --liftoff-only: keep tree-sitter's WASM grammars off V8's turboshaft tier + // to avoid the Zone OOM on Node >= 22 (issues #293/#298). The unix launcher + // passes this too; on Windows we invoke node.exe directly so add it here. + args = ['--liftoff-only', entry].concat(process.argv.slice(2)); } else { command = require.resolve(pkg + '/bin/codegraph'); args = process.argv.slice(2); diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 6f90e6fe..711d39c8 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -27,6 +27,7 @@ import { createShimmerProgress } from '../ui/shimmer-progress'; import { getGlyphs } from '../ui/glyphs'; import { buildNode25BlockBanner, buildNodeTooOldBanner, MIN_NODE_MAJOR } from './node-version-check'; +import { relaunchWithWasmRuntimeFlagsIfNeeded } from '../extraction/wasm-runtime-flags'; // Lazy-load heavy modules (CodeGraph, runInstaller) to keep CLI startup fast. async function loadCodeGraph(): Promise { @@ -75,6 +76,13 @@ if (nodeMajor < MIN_NODE_MAJOR) { // Override active — banner shown for visibility, continuing. } +// Re-exec with V8's `--liftoff-only` if it isn't already set, so tree-sitter's +// large WASM grammars never hit the turboshaft Zone OOM (`Fatal process out of +// memory: Zone`) on Node >= 22. No-op under the bundled launcher, which already +// passes the flag. Must run before any grammar (in the parse worker, which +// inherits this process's flags) is compiled. See ../extraction/wasm-runtime-flags. +relaunchWithWasmRuntimeFlagsIfNeeded(__filename); + // Check if running with no arguments - run installer if (process.argv.length === 2) { import('../installer').then(({ runInstaller }) => diff --git a/src/extraction/wasm-runtime-flags.ts b/src/extraction/wasm-runtime-flags.ts new file mode 100644 index 00000000..f33a19ff --- /dev/null +++ b/src/extraction/wasm-runtime-flags.ts @@ -0,0 +1,96 @@ +/** + * WASM runtime flags — workaround for the V8 turboshaft WASM Zone OOM. + * + * tree-sitter grammars are large WebAssembly modules. On Node >= 22 the V8 + * "turboshaft" optimizing WASM compiler can exhaust its per-compilation Zone + * arena while compiling these grammars on a background thread, aborting the + * whole process with `Fatal process out of memory: Zone` — even with tens of + * GB of system memory free, because the Zone is a V8-internal arena, not the + * JS heap. Reproduced on Node 22 and 24; Node 25 is already hard-blocked for + * the same crash (see ../bin/node-version-check.ts). See issues #293 and #298. + * + * `--liftoff-only` forces every WASM module to the Liftoff baseline compiler + * and never runs turboshaft, which eliminates the crash. Parsing stays fully + * correct; we only forgo the (marginal, and for grammars rarely reached) + * optimized-tier speedup. + * + * This flag MUST be on node's command line — it is read by V8 at engine init, + * before any of our JS runs. Empirically (Node 24) none of these work: + * - `v8.setFlagsFromString('--liftoff-only')` at runtime — too late. + * - Worker `execArgv: ['--liftoff-only']` — rejected (ERR_WORKER_INVALID_EXEC_ARGV). + * - `NODE_OPTIONS=--liftoff-only` — not on Node's NODE_OPTIONS allowlist. + * Also empirically, `--no-wasm-tier-up` / `--no-wasm-dynamic-tiering` do NOT + * prevent the crash — only disabling the optimizing tier entirely does. + * + * Delivery: the bundled launcher passes the flag directly (see + * scripts/build-bundle.sh and scripts/npm-shim.js); for any other launch path + * (running dist directly, from source, etc.) the CLI re-execs itself once with + * the flag via {@link relaunchWithWasmRuntimeFlagsIfNeeded}. V8 flags are + * PROCESS-global, and the parse worker is created with default (inherited) + * execArgv, so flagging the main process governs the worker's WASM compilation + * too. + */ +import { spawnSync } from 'child_process'; + +/** + * The V8 flag(s) that keep tree-sitter grammar compilation off the turboshaft + * optimizing tier. Single source of truth: the relaunch guard and the test + * suite both read this (a test asserts each is a real flag on the running + * runtime, so a rename can't silently regress the fix). + */ +export const WASM_RUNTIME_FLAGS: readonly string[] = ['--liftoff-only']; + +/** + * Env var set on the relaunched child so a detection slip can never cause an + * infinite re-exec loop. Also lets users force-disable the relaunch. + */ +const RELAUNCH_GUARD_ENV = 'CODEGRAPH_WASM_RELAUNCHED'; + +/** True when every required WASM runtime flag is already present in `execArgv`. */ +export function processHasWasmRuntimeFlags( + execArgv: readonly string[] = process.execArgv +): boolean { + return WASM_RUNTIME_FLAGS.every((flag) => execArgv.includes(flag)); +} + +/** + * Build the argv for re-execing node with the WASM runtime flags: our flags + * first, then any node flags already in `execArgv` (deduped), then the script + * and its args. Pure — exported for unit testing. + */ +export function buildRelaunchArgv( + scriptPath: string, + scriptArgs: readonly string[], + execArgv: readonly string[] = process.execArgv +): string[] { + const preserved = execArgv.filter((arg) => !WASM_RUNTIME_FLAGS.includes(arg)); + return [...WASM_RUNTIME_FLAGS, ...preserved, scriptPath, ...scriptArgs]; +} + +/** + * If the current process is missing the WASM runtime flags, re-exec it once + * with them and exit with the child's status. No-op when the flags are already + * present (the normal bundled-launcher path), when already relaunched, or when + * disabled via CODEGRAPH_NO_RELAUNCH. + * + * On spawn failure, returns so the caller runs in-process anyway — risking the + * OOM is still better than refusing to start. + */ +export function relaunchWithWasmRuntimeFlagsIfNeeded(scriptPath: string): void { + if (processHasWasmRuntimeFlags()) return; + if (process.env[RELAUNCH_GUARD_ENV]) return; + if (process.env.CODEGRAPH_NO_RELAUNCH) return; + + const argv = buildRelaunchArgv(scriptPath, process.argv.slice(2)); + const result = spawnSync(process.execPath, argv, { + stdio: 'inherit', + env: { ...process.env, [RELAUNCH_GUARD_ENV]: '1' }, + }); + + if (result.error) { + // Couldn't relaunch (e.g. execPath unavailable) — fall through and run in + // this process. Degraded (may OOM on huge repos) but not broken. + return; + } + process.exit(result.status ?? (result.signal ? 1 : 0)); +}