From dd7f1593a89e56b6ff6f4a0e3efb4d4359a3052a Mon Sep 17 00:00:00 2001 From: BRODY <126101872+Ununp3ntium115@users.noreply.github.com> Date: Sun, 31 May 2026 15:24:20 -0600 Subject: [PATCH] fix: tighten Claude integration setup Add codegraph_files to Claude auto-allow, create non-destructive config backups, clarify codegraph_context as a keyword-backed lookup, and map ArkTS .ets files to the TypeScript grammar. Refs #565 #486 #571 #512. --- __tests__/extraction.test.ts | 1 + __tests__/installer-targets.test.ts | 8 ++++++ __tests__/installer.test.ts | 17 ++++++++++++ __tests__/security.test.ts | 1 + src/extraction/grammars.ts | 5 ++++ src/installer/targets/shared.ts | 40 ++++++++++++++++++++++++++--- src/mcp/server-instructions.ts | 11 +++++--- src/mcp/tools.ts | 8 +++--- 8 files changed, 80 insertions(+), 11 deletions(-) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index b497af6a9..2a92126f3 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -33,6 +33,7 @@ function cleanupTempDir(dir: string): void { describe('Language Detection', () => { it('should detect TypeScript files', () => { expect(detectLanguage('src/index.ts')).toBe('typescript'); + expect(detectLanguage('entry/src/main/ets/pages/Index.ets')).toBe('typescript'); expect(detectLanguage('components/Button.tsx')).toBe('tsx'); }); diff --git a/__tests__/installer-targets.test.ts b/__tests__/installer-targets.test.ts index 27fcbd6e8..73772ec34 100644 --- a/__tests__/installer-targets.test.ts +++ b/__tests__/installer-targets.test.ts @@ -880,6 +880,14 @@ describe('Installer targets — partial-state idempotency', () => { expect(cfg.mcpServers.codegraph).toBeDefined(); }); + it('claude: auto-allow includes codegraph_files (#565)', () => { + const claude = getTarget('claude')!; + claude.install('local', { autoAllow: true }); + + const settings = JSON.parse(fs.readFileSync(path.join(tmpCwd, '.claude', 'settings.json'), 'utf-8')); + expect(settings.permissions.allow).toContain('mcp__codegraph__codegraph_files'); + }); + it('claude: install does NOT create a CLAUDE.md instructions file (#529)', () => { const claude = getTarget('claude')!; const result = claude.install('local', { autoAllow: false }); diff --git a/__tests__/installer.test.ts b/__tests__/installer.test.ts index 6f174f62d..c10f0dcfc 100644 --- a/__tests__/installer.test.ts +++ b/__tests__/installer.test.ts @@ -18,6 +18,7 @@ import * as os from 'os'; import { writeMcpConfig, } from '../src/installer/config-writer'; +import { atomicWriteFileSync } from '../src/installer/targets/shared'; function createTempDir(): string { return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-installer-test-')); @@ -99,6 +100,22 @@ describe('Installer Config Writer', () => { expect(content.mcpServers.codegraph).toBeDefined(); expect(content.mcpServers.other).toBeDefined(); expect(content.customField).toBe('preserved'); + expect(fs.existsSync(mcpJson + '.backup')).toBe(true); + const backup = JSON.parse(fs.readFileSync(mcpJson + '.backup', 'utf-8')); + expect(backup.mcpServers.codegraph).toBeUndefined(); + expect(backup.mcpServers.other).toBeDefined(); + }); + + it('should create numbered backups instead of overwriting an existing backup', () => { + const file = path.join(tempDir, 'settings.json'); + fs.writeFileSync(file, '{"version":1}\n'); + + atomicWriteFileSync(file, '{"version":2}\n'); + atomicWriteFileSync(file, '{"version":3}\n'); + + expect(fs.readFileSync(file, 'utf-8')).toContain('"version":3'); + expect(fs.readFileSync(file + '.backup', 'utf-8')).toContain('"version":1'); + expect(fs.readFileSync(file + '.backup.1', 'utf-8')).toContain('"version":2'); }); }); }); diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts index 75ac84320..94697065a 100644 --- a/__tests__/security.test.ts +++ b/__tests__/security.test.ts @@ -373,6 +373,7 @@ describe('Source file detection (isSourceFile)', () => { expect(isSourceFile('src/index.ts')).toBe(true); expect(isSourceFile('src/deep/nested/file.ts')).toBe(true); expect(isSourceFile('src/component.tsx')).toBe(true); + expect(isSourceFile('entry/src/main/ets/pages/Index.ets')).toBe(true); expect(isSourceFile('lib/util.js')).toBe(true); expect(isSourceFile('src/main.py')).toBe(true); }); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c9a2bcb37..99f91eaa0 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -45,6 +45,11 @@ const WASM_GRAMMAR_FILES: Record = { */ export const EXTENSION_MAP: Record = { '.ts': 'typescript', + // ArkTS (`.ets`) is a TypeScript superset used by HarmonyOS/OpenHarmony. + // The TypeScript grammar handles its common syntax well enough for first-pass + // indexing, and keeps extension selection aligned with custom .ets=typescript + // workarounds users were already applying. + '.ets': 'typescript', '.tsx': 'tsx', '.js': 'javascript', '.mjs': 'javascript', diff --git a/src/installer/targets/shared.ts b/src/installer/targets/shared.ts index 6d54ab570..0ee3e9211 100644 --- a/src/installer/targets/shared.ts +++ b/src/installer/targets/shared.ts @@ -38,9 +38,44 @@ export function getCodeGraphPermissions(): string[] { 'mcp__codegraph__codegraph_impact', 'mcp__codegraph__codegraph_node', 'mcp__codegraph__codegraph_status', + 'mcp__codegraph__codegraph_files', ]; } +/** + * Best-effort backup for an existing user config before we overwrite it. + * Keeps the historical `.backup` name for the first backup, then uses + * numbered siblings so later writes never destroy an earlier restore point. + */ +export function backupFileSync(filePath: string): string | null { + if (!fs.existsSync(filePath)) { + return null; + } + + let backupPath = filePath + '.backup'; + try { + if ( + fs.existsSync(backupPath) && + fs.readFileSync(backupPath).equals(fs.readFileSync(filePath)) + ) { + return backupPath; + } + } catch { + // If comparison fails, still try to create a numbered backup below. + } + + for (let i = 1; fs.existsSync(backupPath); i++) { + backupPath = `${filePath}.backup.${i}`; + } + + try { + fs.copyFileSync(filePath, backupPath); + return backupPath; + } catch { + return null; + } +} + /** * Read a JSON file, returning `{}` when missing or unparseable. * @@ -58,9 +93,7 @@ export function readJsonFile(filePath: string): Record { const msg = err instanceof Error ? err.message : String(err); console.warn(` Warning: Could not parse ${path.basename(filePath)}: ${msg}`); console.warn(` A backup will be created before overwriting.`); - try { - fs.copyFileSync(filePath, filePath + '.backup'); - } catch { /* ignore backup failure */ } + backupFileSync(filePath); return {}; } } @@ -78,6 +111,7 @@ export function atomicWriteFileSync(filePath: string, content: string): void { } const tmpPath = filePath + '.tmp.' + process.pid; try { + backupFileSync(filePath); fs.writeFileSync(tmpPath, content); fs.renameSync(tmpPath, filePath); } catch (err) { diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts index db9949a74..ff74fab16 100644 --- a/src/mcp/server-instructions.ts +++ b/src/mcp/server-instructions.ts @@ -25,8 +25,10 @@ editing code, not during. ## Answer directly — don't delegate exploration For "how does X work", architecture, trace, or where-is-X questions, -answer DIRECTLY using 2-3 codegraph calls: \`codegraph_context\` first, -then ONE \`codegraph_explore\` for the source of the symbols it surfaces. +answer DIRECTLY using 2-3 codegraph calls. If the request contains concrete +symbol names, route/path fragments, or API terms, call \`codegraph_context\` +with those short keywords first; otherwise start with \`codegraph_search\` +to find the right names, then ONE \`codegraph_explore\` for the source. Codegraph IS the pre-built search index — so delegating the lookup to a separate file-reading sub-task/agent, or running your own grep + read loop, repeats work codegraph already did and costs more for the same @@ -37,7 +39,7 @@ of calls; a grep/read exploration is dozens. ## Tool selection by intent - **"What is the symbol named X?"** → \`codegraph_search\` -- **"What's the deal with this task / feature / area?"** → \`codegraph_context\` (PRIMARY — composes search + node + callers + callees in one call) +- **"What's around keyword/symbol/path X?"** → \`codegraph_context\` with terse keywords only (composes search + node + callers + callees in one call) - **"How does X reach/become Y? / trace the flow / the path from X to Y"** → \`codegraph_trace\` (ONE call returns the whole call path, including dynamic-dispatch hops — callbacks, React re-render, JSX children — that grep can't follow) - **"What calls this?"** → \`codegraph_callers\` - **"What does this call?"** → \`codegraph_callees\` @@ -50,7 +52,7 @@ of calls; a grep/read exploration is dozens. ## Common chains - **Flow / "how does X reach Y"**: \`codegraph_trace\` from→to FIRST — one call returns the entire path with dynamic-dispatch hops bridged. Then ONE \`codegraph_explore\` for the hop bodies if you need them. Do NOT reconstruct the path with \`codegraph_search\` + \`codegraph_callers\` — that's exactly what trace does in a single call. -- **Onboarding**: \`codegraph_context\` first. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols. +- **Onboarding**: if the prompt names a concrete area, \`codegraph_context\` with short keywords first. If it is vague, \`codegraph_search\` first to discover names. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols. - **Refactor planning**: \`codegraph_search\` → \`codegraph_callers\` → \`codegraph_impact\`. The blast-radius answer comes from impact, not from walking callers manually. - **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; widen with \`codegraph_impact\` if an unexpected call appears. @@ -58,6 +60,7 @@ of calls; a grep/read exploration is dozens. - **Trust codegraph's results — don't re-verify them with grep.** They come from a full AST parse; re-checking with grep is slower, less accurate, and wastes context. - **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature. +- **Don't pass prose into \`codegraph_context\`** — it is keyword-backed. Use symbol names, route fragments, API names, filenames, or a few concrete terms. - **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip. - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol. - **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph. \`codegraph_status\` also lists pending files under "Pending sync". diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 2e9c6f816..ddb9db2b7 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -402,8 +402,8 @@ const projectPathProperty: PropertySchema = { /** * All CodeGraph MCP tools * - * Designed for minimal context usage - use codegraph_context as the primary tool, - * and only use other tools for targeted follow-up queries. + * Designed for minimal context usage - use codegraph_context with short, + * concrete search terms, and only use other tools for targeted follow-up queries. * * All tools support cross-project queries via the optional `projectPath` parameter. */ @@ -435,13 +435,13 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_context', - description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.', + description: 'Comprehensive code context for short, concrete keywords/symbols/path fragments. Pass terse search terms like "UserService login" or "bondIncome/workflow/callback", not a full natural-language task description.', inputSchema: { type: 'object', properties: { task: { type: 'string', - description: 'Description of the task, bug, or feature to build context for', + description: 'Short keyword query: symbol names, route/path fragments, API names, or 2-5 concrete terms. Do not pass a verbose natural-language task sentence.', }, maxNodes: { type: 'number',