diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index d118a1fd..2843e67e 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -265,6 +265,23 @@ type IndexResult = { function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexResult, projectPath?: string): void { const hasErrors = result.filesErrored > 0; + // Surface non-file-level failures (e.g. lock-acquisition failure + // when another indexer is running) before the file-count branches. + // Without this the CLI falls through to "No files found to index", + // which is actively misleading — the index DID run, it just couldn't + // get the lock. + // + // If success is false but no severity:'error' entry exists in + // `result.errors` (degenerate case — shouldn't happen in practice + // but worth guarding because the result shape is plumbed through + // multiple call sites), fall back to a generic message rather than + // continuing to the misleading "No files found" branch or throwing. + if (!result.success && !hasErrors && result.filesIndexed === 0) { + const generic = result.errors.find((e) => e.severity === 'error'); + clack.log.error(generic?.message ?? 'Indexing failed — no further details available'); + return; + } + if (result.filesIndexed > 0) { if (hasErrors) { clack.log.success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} could not be parsed)`); diff --git a/src/extraction/parse-worker.ts b/src/extraction/parse-worker.ts index 21b239ca..3ddc18e3 100644 --- a/src/extraction/parse-worker.ts +++ b/src/extraction/parse-worker.ts @@ -10,6 +10,48 @@ import { extractFromSource } from './tree-sitter'; import { detectLanguage, loadGrammarsForLanguages, resetParser } from './grammars'; import type { Language, ExtractionResult } from '../types'; +// Emscripten prints `Aborted()` (and a follow-up RuntimeError diag +// line) directly to stderr when WASM aborts — before the JS catch +// runs. Worker stderr is inherited by the parent, so each crash leaks +// a noise line to the user's terminal even though the JS layer +// already handles the failure cleanly. Filter these specific lines +// out at the source. Real diagnostic output (anything we log +// ourselves) goes through console.* / parentPort and is unaffected. +// +// Caveats deliberately accepted: +// - Per-call match: each `write()` call is matched in isolation. +// If Emscripten ever splits `Aborted(` across two write()s (it +// doesn't today — synchronous abort prints the whole line at +// once via libc puts) the first fragment would leak. Buffering +// across calls would add complexity for a hypothetical case. +// - Substring exactness: the prefix `Aborted(` is the literal +// Emscripten signature. Any user code that legitimately writes +// a stderr line starting with that prefix would also be filtered; +// in practice no real diagnostic does. +{ + const realWrite = process.stderr.write.bind(process.stderr); + process.stderr.write = (( + chunk: string | Uint8Array, + encoding?: BufferEncoding | ((err?: Error | null) => void), + cb?: (err?: Error | null) => void + ): boolean => { + const s = typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf-8'); + if ( + s.startsWith('Aborted(') || + s.includes('Build with -sASSERTIONS for more info') + ) { + // Honour the Writable stream contract: callbacks must always + // fire even when the write is suppressed, or upstream code + // waiting on the drain signal would hang. Both overload forms + // are handled (`(chunk, cb)` and `(chunk, encoding, cb)`). + if (typeof encoding === 'function') encoding(); + else if (cb) cb(); + return true; + } + return realWrite(chunk as never, encoding as never, cb as never); + }) as typeof process.stderr.write; +} + const PARSER_RESET_INTERVAL = 5000; const parseCounts = new Map();