Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 82 additions & 2 deletions __tests__/llm-tiers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,28 @@ async function startFake(): Promise<FakeServer> {
if (state.nextChatText !== null) {
text = state.nextChatText;
state.nextChatText = null;
} else if (userText.includes('Symbols (zero-indexed):')) {
// Batched mode (classifier or dead-code judge). Anchor on the
// structural marker, not the reply-instruction phrasing,
// since reply wording can drift independently in either file.
// Count the numbered symbol lines and emit a same-sized array.
const m = userText.match(/Symbols \(zero-indexed\):\n([\s\S]*?)\n\n/);
const lines = (m?.[1] ?? '').split('\n').filter((l) => /^\d+\./.test(l));
const n = lines.length || 1;
if (userText.includes('reviewing whether symbols are dead code')) {
const arr = Array.from({ length: n }, (_, i) =>
`{"i":${i},"verdict":"uncertain","confidence":0.5,"reason":"batch stub"}`
);
text = `[${arr.join(',')}]`;
} else {
const arr = Array.from({ length: n }, (_, i) =>
`{"i":${i},"role":"business_logic"}`
);
text = `[${arr.join(',')}]`;
}
} else if (userText.includes('Reply with EXACTLY one JSON object')) {
// Could be classifier-style or judge-style; default to a
// benign verdict object that satisfies dead-code parsing.
// Per-item fallback (classifier-style or judge-style); default
// to a benign verdict object that satisfies dead-code parsing.
if (userText.includes('"verdict"')) {
text = '{"verdict": "uncertain", "confidence": 0.5, "reason": "test stub"}';
} else if (userText.includes('"consistent"')) {
Expand All @@ -57,6 +76,7 @@ async function startFake(): Promise<FakeServer> {
text = 'unknown';
}
} else if (userText.includes('Classify the following code symbol')) {
// Per-item classifier fallback — singular prompt.
text = 'business_logic';
} else if (userText.includes('Module summary:')) {
text = 'Coordinates a small module that does test things.';
Expand Down Expand Up @@ -290,6 +310,66 @@ export function debounce(fn: () => void, ms: number): () => void {
expect(parseRole('I think this is a util maybe')).toBe('unknown');
});

it('parseBatchResponse extracts roles, tolerates surrounding prose, rejects under-coverage', async () => {
const { parseBatchResponse } = await import('../src/llm/classifier');
// Happy path: clean JSON.
const clean = '[{"i":0,"role":"business_logic"},{"i":1,"role":"util"},{"i":2,"role":"data_model"}]';
const m = parseBatchResponse(clean, 3);
expect(m).not.toBeNull();
expect(m!.get(0)).toBe('business_logic');
expect(m!.get(1)).toBe('util');
expect(m!.get(2)).toBe('data_model');

// Tolerates leading/trailing prose.
const noisy = 'Sure, here you go: [{"i":0,"role":"api_endpoint"}] Hope this helps!';
expect(parseBatchResponse(noisy, 1)?.get(0)).toBe('api_endpoint');

// Coerces unknown labels via parseRole.
const titlecase = '[{"i":0,"role":"Business Logic"}]';
expect(parseBatchResponse(titlecase, 1)?.get(0)).toBe('business_logic');

// Under-coverage rejects (model dropped 2 of 3 entries).
const sparse = '[{"i":0,"role":"util"}]';
expect(parseBatchResponse(sparse, 3)).toBeNull();

// Malformed JSON returns null (caller falls back to per-item).
expect(parseBatchResponse('not json', 3)).toBeNull();
expect(parseBatchResponse('[not json', 3)).toBeNull();

// Bracket inside a string literal must not break depth tracking
// (mirrors parseBatchJudge's tricky case). Role labels are a
// closed set today, but reason-style fields could be added later.
const bracketInString = '[{"i":0,"role":"util","note":"sees [foo] in text"}]';
expect(parseBatchResponse(bracketInString, 1)?.get(0)).toBe('util');
});

it('parseBatchJudge handles dead-code verdict arrays + recovery edges', async () => {
const { parseBatchJudge } = await import('../src/llm/dead-code');
const ok = '[{"i":0,"verdict":"dead","confidence":0.9,"reason":"never called"},'
+ '{"i":1,"verdict":"live","confidence":0.7,"reason":"CLI handler"}]';
const m = parseBatchJudge(ok, 2);
expect(m).not.toBeNull();
expect(m!.get(0)?.verdict).toBe('dead');
expect(m!.get(0)?.confidence).toBeCloseTo(0.9);
expect(m!.get(1)?.verdict).toBe('live');

// Confidence clamped to [0, 1].
const wild = '[{"i":0,"verdict":"dead","confidence":2.5,"reason":"x"}]';
expect(parseBatchJudge(wild, 1)?.get(0)?.confidence).toBe(1);

// Unknown verdict coerces to "uncertain".
const weird = '[{"i":0,"verdict":"maybe","confidence":0.5,"reason":"x"}]';
expect(parseBatchJudge(weird, 1)?.get(0)?.verdict).toBe('uncertain');

// Brace inside string literal must not break depth tracking.
const tricky = '[{"i":0,"verdict":"dead","confidence":0.5,"reason":"sees [foo] in text"}]';
expect(parseBatchJudge(tricky, 1)?.get(0)?.reason).toContain('[foo]');

// Under-coverage rejects.
expect(parseBatchJudge('[{"i":0,"verdict":"dead","confidence":0.5,"reason":"x"}]', 5))
.toBeNull();
});

it('agent bridge: pendingSummariesBatch + saveAgentSummaries round-trip without LLM', async () => {
// No config.llm — exercises the path users without Ollama would take.
const cg = await CodeGraph.init(tempDir);
Expand Down
7 changes: 5 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -997,9 +997,12 @@ export class CodeGraph {

// Concurrency tuning: claude-bridge spawns one subprocess per
// call (~500 ms–1 s startup), so concurrency >1 helps amortise.
// openai-compat servers serialise internally, so 2 is plenty.
// For openai-compat (incl. Anthropic API direct), per-call latency
// is dominated by network roundtrip, not server-side serialisation,
// so higher concurrency hides that latency. Local proxies that do
// serialise (e.g. small Ollama setups) just queue — no harm done.
const provider = resolved.chat?.provider;
const chatConcurrency = provider === 'claude-bridge' ? 4 : 2;
const chatConcurrency = provider === 'claude-bridge' ? 4 : 8;

const hasChat = Boolean(resolved.chat);
const client = new LlmClient(resolved);
Expand Down
205 changes: 170 additions & 35 deletions src/llm/classifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
* a fixed label set. Lets callers filter "show me all api_endpoints"
* or "list the data_models" without crawling the graph by hand.
*
* Tier-2 enrichment: cheap (one short call per symbol, deterministic
* single-token output), cached on symbol_summaries.role.
* Tier-2 enrichment: small fixed-label output, cached on
* symbol_summaries.role. Default mode is BATCHED — we send N symbols
* per call and parse a JSON list back. On batch parse failure we fall
* back to per-item single-symbol calls for that batch only, so an
* unrelated bad symbol can't lose the rest.
*/

import { LlmClient, LlmEndpointError } from './client';
Expand All @@ -26,11 +29,27 @@ export type RoleLabel = (typeof ROLE_LABELS)[number];

const ROLE_SET: ReadonlySet<string> = new Set(ROLE_LABELS);

const DEFAULT_CONCURRENCY = 2;
const DEFAULT_CONCURRENCY = 8;
/** Symbols per batched API call. Each output entry is ~30 chars
* (`{"i":42,"role":"business_logic"}`), so 50 fits comfortably under
* the 1500-token output cap. Input scales with summary length but
* stays well within Haiku's 200K context. */
const DEFAULT_BATCH_SIZE = 50;

const ROLE_LIST_TEXT = [
'- api_endpoint: HTTP/RPC handler, route, public-facing entry point.',
'- business_logic: domain operation, workflow, decision-making.',
'- data_model: type, struct, schema, DTO, persistence record.',
'- util: pure helper, formatter, parser, generic utility.',
'- framework_glue: middleware, adapter, config wiring, lifecycle hook.',
'- test_helper: fixture, mock builder, assertion helper.',
'- unknown: cannot determine from the description.',
].join('\n');

export interface ClassifierOptions {
signal?: AbortSignal;
concurrency?: number;
batchSize?: number;
onProgress?: (done: number, total: number) => void;
}

Expand All @@ -42,26 +61,48 @@ export interface ClassifierResult {
durationMs: number;
}

function buildPrompt(name: string, kind: string, signature: string | null, summary: string): string {
const sig = signature ? `\nSignature: ${signature}` : '';
interface Candidate {
nodeId: string;
name: string;
kind: string;
signature: string | null;
summary: string;
}

function buildSinglePrompt(c: Candidate): string {
const sig = c.signature ? `\nSignature: ${c.signature}` : '';
return [
'Classify the following code symbol into EXACTLY ONE of these roles:',
'',
'- api_endpoint: HTTP/RPC handler, route, public-facing entry point.',
'- business_logic: domain operation, workflow, decision-making.',
'- data_model: type, struct, schema, DTO, persistence record.',
'- util: pure helper, formatter, parser, generic utility.',
'- framework_glue: middleware, adapter, config wiring, lifecycle hook.',
'- test_helper: fixture, mock builder, assertion helper.',
'- unknown: cannot determine from the description.',
ROLE_LIST_TEXT,
'',
`Symbol: ${name} (${kind})${sig}`,
`Description: ${summary}`,
`Symbol: ${c.name} (${c.kind})${sig}`,
`Description: ${c.summary}`,
'',
'Reply with JUST the role name on a single line. No prose, no quotes.',
].join('\n');
}

function buildBatchPrompt(batch: Candidate[]): string {
const items = batch.map((c, i) => {
const sig = c.signature ? ` sig=${c.signature}` : '';
return `${i}. ${c.name} (${c.kind})${sig} — ${c.summary}`;
});
return [
'Classify the following code symbols. For EACH symbol pick EXACTLY ONE role from:',
'',
ROLE_LIST_TEXT,
'',
'Symbols (zero-indexed):',
...items,
'',
'Reply with a JSON array, one object per symbol, IN THE SAME ORDER:',
'[{"i":0,"role":"<role>"},{"i":1,"role":"<role>"},...]',
'',
'No prose, no markdown fences. Just the JSON array.',
].join('\n');
}

/** Strip markdown/quotes/whitespace, return the matched role or "unknown".
* Tries two normalisations: (1) first whitespace-delimited token (handles
* `business_logic.` and `\`business_logic\``), (2) all tokens joined with
Expand All @@ -77,6 +118,59 @@ export function parseRole(text: string): RoleLabel {
return 'unknown';
}

/** Parse a batched response into a position→role map. Tolerates
* surrounding prose / markdown fences / trailing commas by extracting
* the first top-level JSON array. Bracket counting is string-aware
* so `[` / `]` inside string literals (e.g. a role label or future
* reason field containing brackets) don't fool the depth tracker.
* Stray brackets in PROSE before the array (e.g. "[see note] [{...}]")
* are not handled — the scanner consumes the prose pair, JSON.parse
* fails, and the caller falls back to per-item. Returns null on
* irrecoverable parse failure. */
export function parseBatchResponse(text: string, expectedSize: number): Map<number, RoleLabel> | null {
const start = text.indexOf('[');
if (start === -1) return null;
let depth = 0;
let inString = false;
let escape = false;
let end = -1;
for (let i = start; i < text.length; i++) {
const ch = text[i];
if (escape) { escape = false; continue; }
if (ch === '\\' && inString) { escape = true; continue; }
if (ch === '"') { inString = !inString; continue; }
if (inString) continue;
if (ch === '[') depth++;
else if (ch === ']') {
depth--;
if (depth === 0) { end = i; break; }
}
}
if (end === -1) return null;

let parsed: unknown;
try {
parsed = JSON.parse(text.slice(start, end + 1));
} catch {
return null;
}
if (!Array.isArray(parsed)) return null;

const out = new Map<number, RoleLabel>();
for (const entry of parsed) {
if (!entry || typeof entry !== 'object') continue;
const rec = entry as { i?: unknown; role?: unknown };
const idx = typeof rec.i === 'number' ? rec.i : Number(rec.i);
if (!Number.isInteger(idx) || idx < 0 || idx >= expectedSize) continue;
if (typeof rec.role !== 'string') continue;
out.set(idx, parseRole(rec.role));
}
// Require at least 80% coverage — if the model dropped most entries,
// treat the whole batch as failed and let the caller retry per-item.
if (out.size < Math.max(1, Math.floor(expectedSize * 0.8))) return null;
return out;
}

/**
* Run the classifier over every summarised symbol that doesn't yet
* have a role from the active model. Idempotent.
Expand All @@ -89,44 +183,85 @@ export async function classifyAllRoles(
): Promise<ClassifierResult> {
const t0 = Date.now();
const concurrency = Math.max(1, options.concurrency ?? DEFAULT_CONCURRENCY);
const batchSize = Math.max(1, options.batchSize ?? DEFAULT_BATCH_SIZE);

const candidates = queries.getClassifiableSummaries(modelLabel);
const total = candidates.length;
let done = 0;
let classified = 0;
let errors = 0;

let next = 0;
let nextStart = 0;
async function classifyOne(c: Candidate): Promise<void> {
try {
const result = await client.chat(
[{ role: 'user', content: buildSinglePrompt(c) }],
{ temperature: 0, maxTokens: 12, signal: options.signal }
);
if (options.signal?.aborted) return;
const label = parseRole(result.text);
queries.upsertSymbolRole(c.nodeId, label, modelLabel);
classified++;
} catch (err) {
errors++;
if (err instanceof LlmEndpointError) {
logDebug('Classifier: endpoint error', { node: c.nodeId, error: err.message });
} else {
logWarn('Classifier: unexpected error', { node: c.nodeId, error: String(err) });
}
}
}

async function worker(): Promise<void> {
while (next < candidates.length) {
while (nextStart < candidates.length) {
if (options.signal?.aborted) return;
const i = next++;
const c = candidates[i]!;
const start = nextStart;
nextStart += batchSize;
const batch = candidates.slice(start, start + batchSize);

let batchHandled = false;
// maxTokens budget: ~25 chars per entry × batchSize / 4 chars/token.
const batchMaxTokens = Math.max(64, batch.length * 12);
try {
const result = await client.chat(
[
{
role: 'user',
content: buildPrompt(c.name, c.kind, c.signature, c.summary),
},
],
{ temperature: 0, maxTokens: 12, signal: options.signal }
[{ role: 'user', content: buildBatchPrompt(batch) }],
{ temperature: 0, maxTokens: batchMaxTokens, signal: options.signal }
);
// Don't persist if we were cancelled mid-call.
if (options.signal?.aborted) return;
const label = parseRole(result.text);
queries.upsertSymbolRole(c.nodeId, label, modelLabel);
classified++;
const parsed = parseBatchResponse(result.text, batch.length);
if (parsed) {
for (let i = 0; i < batch.length; i++) {
const c = batch[i]!;
const label = parsed.get(i) ?? 'unknown';
queries.upsertSymbolRole(c.nodeId, label, modelLabel);
classified++;
done++;
options.onProgress?.(done, total);
}
batchHandled = true;
} else {
logDebug('Classifier: batch parse failed, falling back to per-item', {
batchStart: start,
batchSize: batch.length,
sample: result.text.slice(0, 120),
});
}
} catch (err) {
errors++;
if (options.signal?.aborted) return;
if (err instanceof LlmEndpointError) {
logDebug('Classifier: endpoint error', { node: c.nodeId, error: err.message });
logDebug('Classifier: batch endpoint error, falling back', { error: err.message });
} else {
logWarn('Classifier: unexpected error', { node: c.nodeId, error: String(err) });
logWarn('Classifier: batch unexpected error, falling back', { error: String(err) });
}
}

if (!batchHandled) {
for (const c of batch) {
if (options.signal?.aborted) return;
await classifyOne(c);
done++;
options.onProgress?.(done, total);
}
} finally {
done++;
options.onProgress?.(done, total);
}
}
}
Expand Down
Loading