Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions __tests__/mcp-tool-registry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* MCP tool registry: structural invariants.
*
* Guards against the failure mode where a future PR adds a
* ToolModule but forgets to implement the matching `handle<Name>`
* method on ToolHandler (or vice versa).
*/
import { describe, it, expect } from 'vitest';
import { getToolModules, tools as registryTools } from '../src/mcp/tools/registry';
import { ToolHandler, tools } from '../src/mcp/tools';

describe('MCP tool registry — single source of truth', () => {
it('every tool module has a non-empty name and description', () => {
for (const m of getToolModules()) {
expect(m.definition.name).toMatch(/^codegraph_[a-z_]+$/);
expect(m.definition.description.length).toBeGreaterThan(20);
}
});

it('handlerKey is a string starting with "handle"', () => {
for (const m of getToolModules()) {
expect(m.handlerKey).toMatch(/^handle[A-Z][A-Za-z]+$/);
}
});

it('every registered tool has a corresponding ToolHandler method', () => {
const handler = new ToolHandler(null);
for (const m of getToolModules()) {
const fn = (handler as unknown as Record<string, unknown>)[m.handlerKey];
expect(typeof fn).toBe('function');
}
});

it('exported `tools` array exactly mirrors the registry', () => {
const fromRegistry = registryTools.map((t) => t.name).sort();
const fromExport = tools.map((t) => t.name).sort();
expect(fromExport).toEqual(fromRegistry);
});

it('all 9 main-line tools are registered (regression guard)', () => {
const expected = [
'codegraph_callees',
'codegraph_callers',
'codegraph_context',
'codegraph_explore',
'codegraph_files',
'codegraph_impact',
'codegraph_node',
'codegraph_search',
'codegraph_status',
];
const actual = getToolModules()
.map((m) => m.definition.name)
.sort();
expect(actual).toEqual(expected);
});

it('execute() reports unknown-tool errors', async () => {
const handler = new ToolHandler(null);
const result = await handler.execute('codegraph_does_not_exist', {});
expect(result.isError).toBe(true);
expect(result.content[0]?.text).toMatch(/Unknown tool/);
});

it('execute() actually dispatches to the registered handler (no broken `this` binding)', async () => {
// No CodeGraph instance is bound, so handlers that call
// `getCodeGraph()` will throw — the dispatch should catch it
// and return an error result. The point of this test is to
// confirm the registry lookup + `this[handlerKey](args)` chain
// reaches an actual method body, not that the body succeeds.
const handler = new ToolHandler(null);
const result = await handler.execute('codegraph_status', {});
expect(result.isError).toBe(true);
// Generic tool-execution-failed envelope from execute()'s catch block.
expect(result.content[0]?.text).toMatch(/Tool execution failed/);
// Specifically because no CodeGraph was bound:
expect(result.content[0]?.text).toMatch(/CodeGraph not initialized/);
});
});
22 changes: 16 additions & 6 deletions src/mcp/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
import * as path from 'path';
import CodeGraph, { findNearestCodeGraphRoot } from '../index';
import { StdioTransport, JsonRpcRequest, JsonRpcNotification, ErrorCodes } from './transport';
import { tools, ToolHandler } from './tools';
import { ToolHandler } from './tools';
import { getToolModule } from './tools/registry';
import { SERVER_INSTRUCTIONS } from './server-instructions';

/**
* Convert a file:// URI to a filesystem path.
Expand All @@ -34,8 +36,10 @@ function fileUriToPath(uri: string): string {
}
return path.resolve(filePath);
} catch {
// Fallback for non-standard URIs
return uri.replace(/^file:\/\/\/?/, '');
// Fallback for non-standard URIs — still resolve through path.resolve
// so a malformed `file:///../etc/passwd` is normalized rather than
// returned raw to downstream filesystem code.
return path.resolve(uri.replace(/^file:\/\/\/?/, ''));
}
}

Expand Down Expand Up @@ -268,13 +272,18 @@ export class MCPServer {
// Try to initialize the default project (non-fatal if it fails)
await this.tryInitializeDefault(projectPath);

// We accept the client's protocol version but respond with our supported version
// We accept the client's protocol version but respond with our supported version.
// `instructions` is a protocol-level field that MCP clients surface in the
// agent's system prompt, giving the agent a high-level playbook for the
// toolset before it sees individual tool descriptions. See
// ./server-instructions.ts.
this.transport.sendResult(request.id, {
protocolVersion: PROTOCOL_VERSION,
capabilities: {
tools: {},
},
serverInfo: SERVER_INFO,
instructions: SERVER_INSTRUCTIONS,
});
}

Expand Down Expand Up @@ -309,8 +318,9 @@ export class MCPServer {
const toolName = params.name;
const toolArgs = params.arguments || {};

// Validate tool exists
const tool = tools.find(t => t.name === toolName);
// Validate tool exists — O(1) Map lookup against the registry,
// matches the path `ToolHandler.execute()` uses internally.
const tool = getToolModule(toolName)?.definition;
if (!tool) {
this.transport.sendError(
request.id,
Expand Down
75 changes: 75 additions & 0 deletions src/mcp/server-instructions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Server-level instructions emitted in the MCP `initialize` response.
*
* MCP clients (Claude Code, Cursor, opencode, LangChain, OpenAI Agent
* SDK, …) surface this text in the agent's system prompt automatically,
* giving the agent a high-level playbook for the codegraph toolset
* before it sees individual tool descriptions.
*
* Goals when editing this:
* - Tool selection by intent (which tool for which question)
* - Common chains (PR review = X then Y; refactor planning = A then B)
* - Anti-patterns (don't grep when codegraph_search is faster)
* - Tier discipline (cheap deterministic → conditional → LLM-mediated)
*
* Keep it tight. The agent reads this every session — long instructions
* burn tokens. Aim for under ~80 lines of useful guidance.
*/
export const SERVER_INSTRUCTIONS = `# Codegraph — code intelligence over an indexed knowledge graph

Codegraph builds a SQLite knowledge graph of every symbol, edge, and
file in the workspace. It is a structural reference manual the agent
consults BEFORE writing or editing code, not a live linter that runs
during generation. Reads are sub-millisecond; the index lags writes by
about a second through the file watcher.

## When to use which tool

- **"What is the symbol named X?"** → \`codegraph_search\` (fast lookup)
- **"What's the deal with this task / feature / bug?"** → \`codegraph_context\` (PRIMARY tool — composes 5+ smaller queries into one answer)
- **"What calls this function?"** → \`codegraph_callers\`
- **"What does this function call?"** → \`codegraph_callees\`
- **"What would changing this break?"** → \`codegraph_impact\`
- **"Is this function risky to change? Is it complex / nested / large?"** → \`codegraph_biomarkers\` (PR #125, when present) — structured answer instead of reading 200 lines of source
- **"Is this function tested? What's covered?"** → \`codegraph_coverage\` (PR #124, when present) — requires a prior \`codegraph coverage <lcov>\` ingestion
- **"Show me this symbol's source / signature / docstring."** → \`codegraph_node\`
- **"Survey an unfamiliar topic / pattern / module."** → \`codegraph_explore\` (heavier; best when budget allows)
- **"What's in directory X?"** → \`codegraph_files\`
- **"Is the index ready / what's its size?"** → \`codegraph_status\`

## Common chains (run tools in sequence)

- **Onboarding to a topic**: \`codegraph_context\` first. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols you want code for.
- **PR review**: if \`codegraph_review_context\` is available (PR #110), pass the unified diff to it — returns affected symbols + their callers + impact + co-change warnings in one call.
- **Refactor planning**: \`codegraph_search\` to find the symbol; \`codegraph_biomarkers\` (mode=symbol) for its Code Health and complexity metrics; \`codegraph_coverage\` (mode=symbol) to see if tests exist; \`codegraph_callers\` for what depends on it; \`codegraph_impact\` for the full blast radius. The killer pre-refactor query is \`codegraph_biomarkers minSeverity=warning minCentrality=0.001\` — lists high-impact code with structural problems in one call.
- **Debugging a regression**: \`codegraph_callers\` of the suspected symbol. If recent changes are in scope, look for hotspot tools (\`codegraph_hotspots\` if available) to identify churn × centrality risk. \`codegraph_biomarkers\` on the suspected hotspot tells you whether the function is structurally bad enough that it might be the cause.
- **"What should I test next?"**: \`codegraph_coverage\` mode=ranked with \`minCentrality\` set — returns high-impact under-covered code, ordered by importance.

## Tool tiers (start cheap, escalate when needed)

1. **Always available, deterministic, sub-millisecond**: search / context / callers / callees / impact / node / explore / files / status. Plus \`codegraph_biomarkers\` once #125 lands — analysis runs as part of every indexAll/sync. Most tasks can be answered entirely at this tier.
2. **Conditional on data availability**: \`codegraph_review_context\` needs a diff. \`codegraph_hotspots\`, \`codegraph_config\`, \`codegraph_sql\` need their respective indexed signals (git history, env-var read sites, SQL string-literals). \`codegraph_coverage\` needs a prior \`codegraph coverage <lcov>\` ingestion. All return clearly when data isn't present.
3. **LLM-mediated, opt-in**: \`codegraph_ask\` (RAG Q&A), \`codegraph_similar\` (semantic search), \`codegraph_dead_code\` (graph + LLM judge), \`codegraph_role\` / \`codegraph_module\` (LLM classifications). These require a configured local LLM endpoint or the agent-bridge tier.

## Agent-bridge tier (when no local LLM is configured)

When LLM-mediated tools aren't available but the user wants summaries:
1. Call \`codegraph_pending_summaries\` to pull a batch of symbols needing summaries (returns each symbol's body + content_hash).
2. The agent (you) generate one-line summaries for each — action-verb leading, no "This function..." preamble, ≤200 chars.
3. Call \`codegraph_save_summaries\` echoing each item's contentHash unchanged. Codegraph re-validates against current disk before persisting.

This lets agents do LLM work themselves when no separate LLM endpoint exists.

## Anti-patterns

- **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature.
- **Don't call \`codegraph_search\` then \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip.
- **Don't use \`codegraph_explore\` for narrow questions** — it's a multi-call deep dive, expensive in tokens. Save it for genuine "I'm new here" surveys.
- **Don't query the index immediately after editing a file** — the watcher needs ~500ms to debounce + sync. Wait for the next turn.

## Limitations

- Index lags file writes by ~1 second (watcher debounce + sync).
- Cross-file resolution is a best-effort name match; ambiguous calls return multiple candidates.
- No live correctness validation — that's still the TypeScript compiler / test suite / linter's job. Codegraph supplements those with structural context they don't have.
`;
39 changes: 39 additions & 0 deletions src/mcp/tool-types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Shared MCP tool types.
*
* Lives in its own module so per-tool files in `./tools/` and
* the legacy class wrapper in `./tools.ts` can import the same
* type definitions without a circular dependency.
*/

export interface PropertySchema {
type: string;
description: string;
enum?: string[];
default?: unknown;
}

export interface ToolDefinition {
name: string;
description: string;
inputSchema: {
type: 'object';
properties: Record<string, PropertySchema>;
required?: string[];
};
}

export interface ToolResult {
content: Array<{ type: 'text'; text: string }>;
isError?: boolean;
}

/**
* Shared `projectPath` schema property — every tool's inputSchema
* accepts it for cross-project queries.
*/
export const projectPathProperty: PropertySchema = {
type: 'string',
description:
'Path to a different project with .codegraph/ initialized. If omitted, uses current project. Use this to query other codebases.',
};
Loading