From dd7f1593a89e56b6ff6f4a0e3efb4d4359a3052a Mon Sep 17 00:00:00 2001
From: BRODY <126101872+Ununp3ntium115@users.noreply.github.com>
Date: Sun, 31 May 2026 15:24:20 -0600
Subject: [PATCH] fix: tighten Claude integration setup

Add codegraph_files to Claude auto-allow, create non-destructive config backups, clarify codegraph_context as a keyword-backed lookup, and map ArkTS .ets files to the TypeScript grammar.

Refs #565 #486 #571 #512.
---
 __tests__/extraction.test.ts        |  1 +
 __tests__/installer-targets.test.ts |  8 ++++++
 __tests__/installer.test.ts         | 17 ++++++++++++
 __tests__/security.test.ts          |  1 +
 src/extraction/grammars.ts          |  5 ++++
 src/installer/targets/shared.ts     | 40 ++++++++++++++++++++++++++---
 src/mcp/server-instructions.ts      | 11 +++++---
 src/mcp/tools.ts                    |  8 +++---
 8 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts
index b497af6a9..2a92126f3 100644
--- a/__tests__/extraction.test.ts
+++ b/__tests__/extraction.test.ts
@@ -33,6 +33,7 @@ function cleanupTempDir(dir: string): void {
 describe('Language Detection', () => {
   it('should detect TypeScript files', () => {
     expect(detectLanguage('src/index.ts')).toBe('typescript');
+    expect(detectLanguage('entry/src/main/ets/pages/Index.ets')).toBe('typescript');
     expect(detectLanguage('components/Button.tsx')).toBe('tsx');
   });
 
diff --git a/__tests__/installer-targets.test.ts b/__tests__/installer-targets.test.ts
index 27fcbd6e8..73772ec34 100644
--- a/__tests__/installer-targets.test.ts
+++ b/__tests__/installer-targets.test.ts
@@ -880,6 +880,14 @@ describe('Installer targets — partial-state idempotency', () => {
     expect(cfg.mcpServers.codegraph).toBeDefined();
   });
 
+  it('claude: auto-allow includes codegraph_files (#565)', () => {
+    const claude = getTarget('claude')!;
+    claude.install('local', { autoAllow: true });
+
+    const settings = JSON.parse(fs.readFileSync(path.join(tmpCwd, '.claude', 'settings.json'), 'utf-8'));
+    expect(settings.permissions.allow).toContain('mcp__codegraph__codegraph_files');
+  });
+
   it('claude: install does NOT create a CLAUDE.md instructions file (#529)', () => {
     const claude = getTarget('claude')!;
     const result = claude.install('local', { autoAllow: false });
diff --git a/__tests__/installer.test.ts b/__tests__/installer.test.ts
index 6f174f62d..c10f0dcfc 100644
--- a/__tests__/installer.test.ts
+++ b/__tests__/installer.test.ts
@@ -18,6 +18,7 @@ import * as os from 'os';
 import {
   writeMcpConfig,
 } from '../src/installer/config-writer';
+import { atomicWriteFileSync } from '../src/installer/targets/shared';
 
 function createTempDir(): string {
   return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-installer-test-'));
@@ -99,6 +100,22 @@ describe('Installer Config Writer', () => {
       expect(content.mcpServers.codegraph).toBeDefined();
       expect(content.mcpServers.other).toBeDefined();
       expect(content.customField).toBe('preserved');
+      expect(fs.existsSync(mcpJson + '.backup')).toBe(true);
+      const backup = JSON.parse(fs.readFileSync(mcpJson + '.backup', 'utf-8'));
+      expect(backup.mcpServers.codegraph).toBeUndefined();
+      expect(backup.mcpServers.other).toBeDefined();
+    });
+
+    it('should create numbered backups instead of overwriting an existing backup', () => {
+      const file = path.join(tempDir, 'settings.json');
+      fs.writeFileSync(file, '{"version":1}\n');
+
+      atomicWriteFileSync(file, '{"version":2}\n');
+      atomicWriteFileSync(file, '{"version":3}\n');
+
+      expect(fs.readFileSync(file, 'utf-8')).toContain('"version":3');
+      expect(fs.readFileSync(file + '.backup', 'utf-8')).toContain('"version":1');
+      expect(fs.readFileSync(file + '.backup.1', 'utf-8')).toContain('"version":2');
     });
   });
 });
diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts
index 75ac84320..94697065a 100644
--- a/__tests__/security.test.ts
+++ b/__tests__/security.test.ts
@@ -373,6 +373,7 @@ describe('Source file detection (isSourceFile)', () => {
     expect(isSourceFile('src/index.ts')).toBe(true);
     expect(isSourceFile('src/deep/nested/file.ts')).toBe(true);
     expect(isSourceFile('src/component.tsx')).toBe(true);
+    expect(isSourceFile('entry/src/main/ets/pages/Index.ets')).toBe(true);
     expect(isSourceFile('lib/util.js')).toBe(true);
     expect(isSourceFile('src/main.py')).toBe(true);
   });
diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts
index c9a2bcb37..99f91eaa0 100644
--- a/src/extraction/grammars.ts
+++ b/src/extraction/grammars.ts
@@ -45,6 +45,11 @@ const WASM_GRAMMAR_FILES: Record<GrammarLanguage, string> = {
  */
 export const EXTENSION_MAP: Record<string, Language> = {
   '.ts': 'typescript',
+  // ArkTS (`.ets`) is a TypeScript superset used by HarmonyOS/OpenHarmony.
+  // The TypeScript grammar handles its common syntax well enough for first-pass
+  // indexing, and keeps extension selection aligned with custom .ets=typescript
+  // workarounds users were already applying.
+  '.ets': 'typescript',
   '.tsx': 'tsx',
   '.js': 'javascript',
   '.mjs': 'javascript',
diff --git a/src/installer/targets/shared.ts b/src/installer/targets/shared.ts
index 6d54ab570..0ee3e9211 100644
--- a/src/installer/targets/shared.ts
+++ b/src/installer/targets/shared.ts
@@ -38,9 +38,44 @@ export function getCodeGraphPermissions(): string[] {
     'mcp__codegraph__codegraph_impact',
     'mcp__codegraph__codegraph_node',
     'mcp__codegraph__codegraph_status',
+    'mcp__codegraph__codegraph_files',
   ];
 }
 
+/**
+ * Best-effort backup for an existing user config before we overwrite it.
+ * Keeps the historical `<path>.backup` name for the first backup, then uses
+ * numbered siblings so later writes never destroy an earlier restore point.
+ */
+export function backupFileSync(filePath: string): string | null {
+  if (!fs.existsSync(filePath)) {
+    return null;
+  }
+
+  let backupPath = filePath + '.backup';
+  try {
+    if (
+      fs.existsSync(backupPath) &&
+      fs.readFileSync(backupPath).equals(fs.readFileSync(filePath))
+    ) {
+      return backupPath;
+    }
+  } catch {
+    // If comparison fails, still try to create a numbered backup below.
+  }
+
+  for (let i = 1; fs.existsSync(backupPath); i++) {
+    backupPath = `${filePath}.backup.${i}`;
+  }
+
+  try {
+    fs.copyFileSync(filePath, backupPath);
+    return backupPath;
+  } catch {
+    return null;
+  }
+}
+
 /**
  * Read a JSON file, returning `{}` when missing or unparseable.
  *
@@ -58,9 +93,7 @@ export function readJsonFile(filePath: string): Record<string, any> {
     const msg = err instanceof Error ? err.message : String(err);
     console.warn(`  Warning: Could not parse ${path.basename(filePath)}: ${msg}`);
     console.warn(`  A backup will be created before overwriting.`);
-    try {
-      fs.copyFileSync(filePath, filePath + '.backup');
-    } catch { /* ignore backup failure */ }
+    backupFileSync(filePath);
     return {};
   }
 }
@@ -78,6 +111,7 @@ export function atomicWriteFileSync(filePath: string, content: string): void {
   }
   const tmpPath = filePath + '.tmp.' + process.pid;
   try {
+    backupFileSync(filePath);
     fs.writeFileSync(tmpPath, content);
     fs.renameSync(tmpPath, filePath);
   } catch (err) {
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
index db9949a74..ff74fab16 100644
--- a/src/mcp/server-instructions.ts
+++ b/src/mcp/server-instructions.ts
@@ -25,8 +25,10 @@ editing code, not during.
 ## Answer directly — don't delegate exploration
 
 For "how does X work", architecture, trace, or where-is-X questions,
-answer DIRECTLY using 2-3 codegraph calls: \`codegraph_context\` first,
-then ONE \`codegraph_explore\` for the source of the symbols it surfaces.
+answer DIRECTLY using 2-3 codegraph calls. If the request contains concrete
+symbol names, route/path fragments, or API terms, call \`codegraph_context\`
+with those short keywords first; otherwise start with \`codegraph_search\`
+to find the right names, then ONE \`codegraph_explore\` for the source.
 Codegraph IS the pre-built search index — so delegating the lookup to a
 separate file-reading sub-task/agent, or running your own grep + read
 loop, repeats work codegraph already did and costs more for the same
@@ -37,7 +39,7 @@ of calls; a grep/read exploration is dozens.
 ## Tool selection by intent
 
 - **"What is the symbol named X?"** → \`codegraph_search\`
-- **"What's the deal with this task / feature / area?"** → \`codegraph_context\` (PRIMARY — composes search + node + callers + callees in one call)
+- **"What's around keyword/symbol/path X?"** → \`codegraph_context\` with terse keywords only (composes search + node + callers + callees in one call)
 - **"How does X reach/become Y? / trace the flow / the path from X to Y"** → \`codegraph_trace\` (ONE call returns the whole call path, including dynamic-dispatch hops — callbacks, React re-render, JSX children — that grep can't follow)
 - **"What calls this?"** → \`codegraph_callers\`
 - **"What does this call?"** → \`codegraph_callees\`
@@ -50,7 +52,7 @@ of calls; a grep/read exploration is dozens.
 ## Common chains
 
 - **Flow / "how does X reach Y"**: \`codegraph_trace\` from→to FIRST — one call returns the entire path with dynamic-dispatch hops bridged. Then ONE \`codegraph_explore\` for the hop bodies if you need them. Do NOT reconstruct the path with \`codegraph_search\` + \`codegraph_callers\` — that's exactly what trace does in a single call.
-- **Onboarding**: \`codegraph_context\` first. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols.
+- **Onboarding**: if the prompt names a concrete area, \`codegraph_context\` with short keywords first. If it is vague, \`codegraph_search\` first to discover names. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols.
 - **Refactor planning**: \`codegraph_search\` → \`codegraph_callers\` → \`codegraph_impact\`. The blast-radius answer comes from impact, not from walking callers manually.
 - **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; widen with \`codegraph_impact\` if an unexpected call appears.
 
@@ -58,6 +60,7 @@ of calls; a grep/read exploration is dozens.
 
 - **Trust codegraph's results — don't re-verify them with grep.** They come from a full AST parse; re-checking with grep is slower, less accurate, and wastes context.
 - **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature.
+- **Don't pass prose into \`codegraph_context\`** — it is keyword-backed. Use symbol names, route fragments, API names, filenames, or a few concrete terms.
 - **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip.
 - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol.
 - **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph. \`codegraph_status\` also lists pending files under "Pending sync".
diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
index 2e9c6f816..ddb9db2b7 100644
--- a/src/mcp/tools.ts
+++ b/src/mcp/tools.ts
@@ -402,8 +402,8 @@ const projectPathProperty: PropertySchema = {
 /**
  * All CodeGraph MCP tools
  *
- * Designed for minimal context usage - use codegraph_context as the primary tool,
- * and only use other tools for targeted follow-up queries.
+ * Designed for minimal context usage - use codegraph_context with short,
+ * concrete search terms, and only use other tools for targeted follow-up queries.
  *
  * All tools support cross-project queries via the optional `projectPath` parameter.
  */
@@ -435,13 +435,13 @@ export const tools: ToolDefinition[] = [
   },
   {
     name: 'codegraph_context',
-    description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.',
+    description: 'Comprehensive code context for short, concrete keywords/symbols/path fragments. Pass terse search terms like "UserService login" or "bondIncome/workflow/callback", not a full natural-language task description.',
     inputSchema: {
       type: 'object',
       properties: {
         task: {
           type: 'string',
-          description: 'Description of the task, bug, or feature to build context for',
+          description: 'Short keyword query: symbol names, route/path fragments, API names, or 2-5 concrete terms. Do not pass a verbose natural-language task sentence.',
         },
         maxNodes: {
           type: 'number',