Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,28 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged
This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Changed
- **MCP / explore**: `codegraph_explore` output is now adaptive to project
size. The tool used to apply a fixed 35KB cap regardless of how large the
codebase was, which on small projects (~100 files) produced bigger
responses than the agent's native grep+Read flow would have — exactly the
scenario reported in
[#185](https://github.com/colbymchenry/codegraph/issues/185). The budget
now scales with indexed file count: small projects (<500 files) cap at
~18KB and skip the "Additional relevant files" / completeness / explore-
budget reminders that earn their keep on bigger codebases; medium
(<5,000) caps at ~28KB; large (<15,000) keeps the historical ~35KB; very
large goes up to ~38KB. A new per-file char cap also prevents a single
file with many adjacent symbols from collapsing into one whole-file dump
(the Alamofire `Session.swift` case from #185). Measured against the
same repos used in the README benchmark: Alamofire ~62% smaller per call,
Excalidraw ~35%, VS Code ~14%. Agent-trust floor still holds — the
Relationships section, scored cluster selection, and structured-source
output are all retained. Thanks to
[@essopsp](https://github.com/essopsp) for the repro.

## [0.7.10] - 2026-05-19

### Fixed
Expand Down
191 changes: 191 additions & 0 deletions __tests__/explore-output-budget.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/**
* Adaptive output budget for codegraph_explore (#185).
*
* The explore tool used to apply a fixed 35KB output cap regardless of
* project size, which on small codebases was a net loss vs. native
* grep+Read. These tests pin the per-tier budget shape so future tuning
* doesn't silently drift the small-project case back into bloat.
*/
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { getExploreOutputBudget, getExploreBudget, ToolHandler } from '../src/mcp/tools';
import CodeGraph from '../src/index';

describe('getExploreOutputBudget', () => {
it('returns a strictly smaller total cap for small projects than for huge ones', () => {
const small = getExploreOutputBudget(100);
const huge = getExploreOutputBudget(30000);
expect(small.maxOutputChars).toBeLessThan(huge.maxOutputChars);
expect(small.defaultMaxFiles).toBeLessThan(huge.defaultMaxFiles);
expect(small.maxCharsPerFile).toBeLessThan(huge.maxCharsPerFile);
});

it('caps total output well under 8000 tokens (~32k chars) on small projects', () => {
const small = getExploreOutputBudget(100);
expect(small.maxOutputChars).toBeLessThanOrEqual(20000);
});

it('keeps the historical 35k+ ceiling for medium-large projects so existing benchmarks do not regress', () => {
const large = getExploreOutputBudget(10000);
expect(large.maxOutputChars).toBeGreaterThanOrEqual(35000);
});

it('uses tier breakpoints matching getExploreBudget so call-count and output-budget agree on a project', () => {
// Anything in the same tier should pick the same total-output cap.
const tier1a = getExploreOutputBudget(50);
const tier1b = getExploreOutputBudget(499);
expect(tier1a.maxOutputChars).toBe(tier1b.maxOutputChars);
expect(getExploreBudget(50)).toBe(getExploreBudget(499));

const tier2a = getExploreOutputBudget(500);
const tier2b = getExploreOutputBudget(4999);
expect(tier2a.maxOutputChars).toBe(tier2b.maxOutputChars);
expect(getExploreBudget(500)).toBe(getExploreBudget(4999));

const tier3a = getExploreOutputBudget(5000);
const tier3b = getExploreOutputBudget(14999);
expect(tier3a.maxOutputChars).toBe(tier3b.maxOutputChars);

// And crossing a breakpoint changes the cap.
expect(tier1a.maxOutputChars).not.toBe(tier2a.maxOutputChars);
expect(tier2a.maxOutputChars).not.toBe(tier3a.maxOutputChars);
});

it('gates off "Additional relevant files", completeness signal, and budget note on small projects', () => {
const small = getExploreOutputBudget(100);
expect(small.includeAdditionalFiles).toBe(false);
expect(small.includeCompletenessSignal).toBe(false);
expect(small.includeBudgetNote).toBe(false);
});

it('keeps all meta-text on for projects that earn the breadth signal (>=500 files)', () => {
const medium = getExploreOutputBudget(1000);
expect(medium.includeAdditionalFiles).toBe(true);
expect(medium.includeCompletenessSignal).toBe(true);
expect(medium.includeBudgetNote).toBe(true);
});

it('keeps the Relationships section on for every tier — it is the cheapest structural signal', () => {
expect(getExploreOutputBudget(50).includeRelationships).toBe(true);
expect(getExploreOutputBudget(1000).includeRelationships).toBe(true);
expect(getExploreOutputBudget(10000).includeRelationships).toBe(true);
expect(getExploreOutputBudget(30000).includeRelationships).toBe(true);
});

it('caps the per-file header symbol list more tightly on small projects', () => {
// Without this cap, a file like Alamofire's Session.swift produced
// a 3.4KB symbol list in the `#### path — sym, sym, ...` header,
// dwarfing the per-file body cap.
const small = getExploreOutputBudget(100);
const huge = getExploreOutputBudget(30000);
expect(small.maxSymbolsInFileHeader).toBeLessThan(huge.maxSymbolsInFileHeader);
expect(small.maxSymbolsInFileHeader).toBeGreaterThan(0);
});

it('uses a tighter clustering gap threshold on small projects to break runaway single clusters', () => {
const small = getExploreOutputBudget(100);
const huge = getExploreOutputBudget(30000);
expect(small.gapThreshold).toBeLessThanOrEqual(huge.gapThreshold);
});

it('handles the boundary file counts exactly (off-by-one regression guard)', () => {
// 499 -> small tier, 500 -> medium tier
expect(getExploreOutputBudget(499).maxOutputChars).toBe(getExploreOutputBudget(100).maxOutputChars);
expect(getExploreOutputBudget(500).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
// 4999 -> medium, 5000 -> large
expect(getExploreOutputBudget(4999).maxOutputChars).toBe(getExploreOutputBudget(1000).maxOutputChars);
expect(getExploreOutputBudget(5000).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
// 14999 -> large, 15000 -> xlarge
expect(getExploreOutputBudget(14999).maxOutputChars).toBe(getExploreOutputBudget(10000).maxOutputChars);
expect(getExploreOutputBudget(15000).maxOutputChars).toBe(getExploreOutputBudget(30000).maxOutputChars);
});
});

/**
* End-to-end check that the budget is actually applied by handleExplore.
*
* Builds a tiny synthetic project (<500 files, so the small tier), indexes
* it, and confirms the output:
* - stays under the small-tier maxOutputChars cap
* - omits the meta-text the small tier gates off (completeness signal,
* budget note, "Additional relevant files")
*
* Regression guard for #185 — protects against future edits to handleExplore
* silently re-introducing the fixed 35KB cap on small projects.
*/
describe('codegraph_explore output respects the adaptive budget', () => {
let testDir: string;
let cg: CodeGraph;
let handler: ToolHandler;

beforeAll(async () => {
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-explore-budget-'));
const srcDir = path.join(testDir, 'src');
fs.mkdirSync(srcDir);

// A handful of files with one fat target file. The fat file mimics the
// Alamofire Session.swift case: many methods stacked on top of each other,
// which collapsed into one giant cluster pre-#185.
const fatLines: string[] = ['export class Session {'];
for (let i = 0; i < 30; i++) {
fatLines.push(` method${i}(arg: string): string {`);
fatLines.push(` return this.helper${i}(arg) + "${i}";`);
fatLines.push(` }`);
fatLines.push(` private helper${i}(arg: string): string {`);
fatLines.push(` return arg.repeat(${i + 1});`);
fatLines.push(` }`);
}
fatLines.push('}');
fs.writeFileSync(path.join(srcDir, 'session.ts'), fatLines.join('\n'));

// A few small supporting files so the project has >1 indexed file.
for (let i = 0; i < 5; i++) {
fs.writeFileSync(
path.join(srcDir, `support${i}.ts`),
`import { Session } from './session';\nexport function callSession${i}(s: Session) { return s.method${i}('hi'); }\n`
);
}

cg = CodeGraph.initSync(testDir, {
config: { include: ['**/*.ts'], exclude: [] },
});
await cg.indexAll();
handler = new ToolHandler(cg);
});

afterAll(() => {
if (cg) cg.destroy();
if (testDir && fs.existsSync(testDir)) {
fs.rmSync(testDir, { recursive: true, force: true });
}
});

it('keeps total output under the small-project cap', async () => {
const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
const text = result.content?.[0]?.text ?? '';
const smallBudget = getExploreOutputBudget(100);
// Allow a small overshoot for the trailing markers — the cap is enforced
// per-file rather than as an absolute output ceiling.
expect(text.length).toBeLessThan(smallBudget.maxOutputChars + 500);
});

it('omits the meta-text gated off for small projects', async () => {
const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
const text = result.content?.[0]?.text ?? '';
expect(text).not.toContain('### Additional relevant files');
expect(text).not.toContain('Complete source code is included above');
expect(text).not.toContain('Explore budget:');
});

it('still includes the Relationships section — it is the cheapest structural signal', async () => {
const result = await handler.execute('codegraph_explore', { query: 'Session method helper' });
const text = result.content?.[0]?.text ?? '';
// Either there are relationships, or no edges were significant — both are fine.
// We just want to confirm we did not accidentally gate it off.
const hasRelationships = text.includes('### Relationships');
const sourceFollowsHeader = text.indexOf('### Source Code') > 0;
expect(hasRelationships || sourceFollowsHeader).toBe(true);
});
});
Loading