From 9f91f1f8e089b4bd2267990e01fff7e2314602ab Mon Sep 17 00:00:00 2001 From: andreinknv Date: Tue, 28 Apr 2026 17:34:00 -0400 Subject: [PATCH 1/2] fix(cli): surface lock-acquisition errors and silence Emscripten Aborted() spam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two unrelated cosmetic but actively misleading bugs that surface when the indexer is under load. 1) printIndexResult fell through to "No files found to index" whenever the IndexResult had filesIndexed=0 AND filesErrored=0. The lock-acquisition path returns success:false with a generic "Could not acquire file lock" entry in result.errors[] (severity 'error'), but filesErrored counts only file-level parse failures, so the user saw "No files found to index" — actively wrong. Add a top-of-function check for the !success && !hasErrors case that surfaces the first severity:'error' message instead. 2) parse-worker.ts let Emscripten's stderr "Aborted()" lines (plus their "Build with -sASSERTIONS for more info" follow-ups) leak to the parent's terminal whenever a WASM tree-sitter parser crashed on a pathological file. Even after the JS layer caught and recovered, the user saw dozens of `Aborted()` lines spammed to stderr. Install a stderr filter at worker startup that drops only those specific Emscripten internal lines; everything we log ourselves passes through unchanged. Verified live against ollama/ollama@v0.22.0: - second concurrent `codegraph index` now shows "Could not acquire file lock - another process may be indexing" instead of "No files found to index" - WASM-crash-prone re-index produced 0 Aborted() lines (down from 68+). --- src/bin/codegraph.ts | 13 +++++++++++++ src/extraction/parse-worker.ts | 27 +++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index d118a1fd..2b891a0d 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -265,6 +265,19 @@ type IndexResult = { function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexResult, projectPath?: string): void { const hasErrors = result.filesErrored > 0; + // Surface non-file-level failures (e.g. lock-acquisition failure + // when another indexer is running) before the file-count branches. + // Without this the CLI falls through to "No files found to index", + // which is actively misleading — the index DID run, it just couldn't + // get the lock. + if (!result.success && !hasErrors && result.filesIndexed === 0) { + const generic = result.errors.find((e) => e.severity === 'error'); + if (generic) { + clack.log.error(generic.message); + return; + } + } + if (result.filesIndexed > 0) { if (hasErrors) { clack.log.success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} could not be parsed)`); diff --git a/src/extraction/parse-worker.ts b/src/extraction/parse-worker.ts index 21b239ca..fd0ad74e 100644 --- a/src/extraction/parse-worker.ts +++ b/src/extraction/parse-worker.ts @@ -10,6 +10,33 @@ import { extractFromSource } from './tree-sitter'; import { detectLanguage, loadGrammarsForLanguages, resetParser } from './grammars'; import type { Language, ExtractionResult } from '../types'; +// Emscripten prints `Aborted()` (and a follow-up RuntimeError diag +// line) directly to stderr when WASM aborts — before the JS catch +// runs. Worker stderr is inherited by the parent, so each crash leaks +// a noise line to the user's terminal even though the JS layer +// already handles the failure cleanly. Filter these specific lines +// out at the source. Real diagnostic output (anything we log +// ourselves) goes through console.* / parentPort and is unaffected. +{ + const realWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (( + chunk: string | Uint8Array, + encoding?: BufferEncoding | ((err?: Error | null) => void), + cb?: (err?: Error | null) => void + ): boolean => { + const s = typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf-8'); + if ( + s.startsWith('Aborted(') || + s.includes('Build with -sASSERTIONS for more info') + ) { + if (typeof encoding === 'function') encoding(); + else if (cb) cb(); + return true; + } + return realWrite(chunk as never, encoding as never, cb as never); + }) as typeof process.stderr.write; +} + const PARSER_RESET_INTERVAL = 5000; const parseCounts = new Map(); From fe3dc95631bee7e7c8ad56b885085c8b52020ec1 Mon Sep 17 00:00:00 2001 From: andreinknv Date: Tue, 28 Apr 2026 18:54:12 -0400 Subject: [PATCH 2/2] fix(cli): null-safe error surfacing + clearer stderr-filter contract docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two reviewer findings on PR #128: - printIndexResult: when result.success is false but result.errors contains no severity:'error' entry (degenerate case but possible if the result shape ever drifts), the find() returned undefined and the previous if-guard fell through to the misleading 'No files found to index' branch. Now always surfaces a clear failure message via clack.log.error, defaulting to 'Indexing failed — no further details available' when no specific error is in the errors list. - parse-worker stderr filter: callback handling was already correct but the comment didn't document it; expand the comment to spell out the Writable-stream-contract obligation, the per-call match semantics (split-chunk caveat), and the substring-exactness trade-off so future readers understand the deliberate trade-offs. --- src/bin/codegraph.ts | 12 ++++++++---- src/extraction/parse-worker.ts | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 2b891a0d..2843e67e 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -270,12 +270,16 @@ function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexR // Without this the CLI falls through to "No files found to index", // which is actively misleading — the index DID run, it just couldn't // get the lock. + // + // If success is false but no severity:'error' entry exists in + // `result.errors` (degenerate case — shouldn't happen in practice + // but worth guarding because the result shape is plumbed through + // multiple call sites), fall back to a generic message rather than + // continuing to the misleading "No files found" branch or throwing. if (!result.success && !hasErrors && result.filesIndexed === 0) { const generic = result.errors.find((e) => e.severity === 'error'); - if (generic) { - clack.log.error(generic.message); - return; - } + clack.log.error(generic?.message ?? 'Indexing failed — no further details available'); + return; } if (result.filesIndexed > 0) { diff --git a/src/extraction/parse-worker.ts b/src/extraction/parse-worker.ts index fd0ad74e..3ddc18e3 100644 --- a/src/extraction/parse-worker.ts +++ b/src/extraction/parse-worker.ts @@ -17,6 +17,17 @@ import type { Language, ExtractionResult } from '../types'; // already handles the failure cleanly. Filter these specific lines // out at the source. Real diagnostic output (anything we log // ourselves) goes through console.* / parentPort and is unaffected. +// +// Caveats deliberately accepted: +// - Per-call match: each `write()` call is matched in isolation. +// If Emscripten ever splits `Aborted(` across two write()s (it +// doesn't today — synchronous abort prints the whole line at +// once via libc puts) the first fragment would leak. Buffering +// across calls would add complexity for a hypothetical case. +// - Substring exactness: the prefix `Aborted(` is the literal +// Emscripten signature. Any user code that legitimately writes +// a stderr line starting with that prefix would also be filtered; +// in practice no real diagnostic does. { const realWrite = process.stderr.write.bind(process.stderr); process.stderr.write = (( @@ -29,6 +40,10 @@ import type { Language, ExtractionResult } from '../types'; s.startsWith('Aborted(') || s.includes('Build with -sASSERTIONS for more info') ) { + // Honour the Writable stream contract: callbacks must always + // fire even when the write is suppressed, or upstream code + // waiting on the drain signal would hang. Both overload forms + // are handled (`(chunk, cb)` and `(chunk, encoding, cb)`). if (typeof encoding === 'function') encoding(); else if (cb) cb(); return true;