Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/bin/codegraph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,23 @@ type IndexResult = {
function printIndexResult(clack: typeof import('@clack/prompts'), result: IndexResult, projectPath?: string): void {
const hasErrors = result.filesErrored > 0;

// Surface non-file-level failures (e.g. lock-acquisition failure
// when another indexer is running) before the file-count branches.
// Without this the CLI falls through to "No files found to index",
// which is actively misleading — the index DID run, it just couldn't
// get the lock.
//
// If success is false but no severity:'error' entry exists in
// `result.errors` (degenerate case — shouldn't happen in practice
// but worth guarding because the result shape is plumbed through
// multiple call sites), fall back to a generic message rather than
// continuing to the misleading "No files found" branch or throwing.
if (!result.success && !hasErrors && result.filesIndexed === 0) {
const generic = result.errors.find((e) => e.severity === 'error');
clack.log.error(generic?.message ?? 'Indexing failed — no further details available');
return;
}

if (result.filesIndexed > 0) {
if (hasErrors) {
clack.log.success(`Indexed ${formatNumber(result.filesIndexed)} files (${formatNumber(result.filesErrored)} could not be parsed)`);
Expand Down
42 changes: 42 additions & 0 deletions src/extraction/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,48 @@ import { extractFromSource } from './tree-sitter';
import { detectLanguage, loadGrammarsForLanguages, resetParser } from './grammars';
import type { Language, ExtractionResult } from '../types';

// Emscripten prints `Aborted()` (and a follow-up RuntimeError diag
// line) directly to stderr when WASM aborts — before the JS catch
// runs. Worker stderr is inherited by the parent, so each crash leaks
// a noise line to the user's terminal even though the JS layer
// already handles the failure cleanly. Filter these specific lines
// out at the source. Real diagnostic output (anything we log
// ourselves) goes through console.* / parentPort and is unaffected.
//
// Caveats deliberately accepted:
// - Per-call match: each `write()` call is matched in isolation.
// If Emscripten ever splits `Aborted(` across two write()s (it
// doesn't today — synchronous abort prints the whole line at
// once via libc puts) the first fragment would leak. Buffering
// across calls would add complexity for a hypothetical case.
// - Substring exactness: the prefix `Aborted(` is the literal
// Emscripten signature. Any user code that legitimately writes
// a stderr line starting with that prefix would also be filtered;
// in practice no real diagnostic does.
{
const realWrite = process.stderr.write.bind(process.stderr);
process.stderr.write = ((
chunk: string | Uint8Array,
encoding?: BufferEncoding | ((err?: Error | null) => void),
cb?: (err?: Error | null) => void
): boolean => {
const s = typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf-8');
if (
s.startsWith('Aborted(') ||
s.includes('Build with -sASSERTIONS for more info')
) {
// Honour the Writable stream contract: callbacks must always
// fire even when the write is suppressed, or upstream code
// waiting on the drain signal would hang. Both overload forms
// are handled (`(chunk, cb)` and `(chunk, encoding, cb)`).
if (typeof encoding === 'function') encoding();
else if (cb) cb();
return true;
}
return realWrite(chunk as never, encoding as never, cb as never);
}) as typeof process.stderr.write;
}

const PARSER_RESET_INTERVAL = 5000;
const parseCounts = new Map<Language, number>();

Expand Down