diff --git a/README.md b/README.md index acdb726bf..b4fe60f56 100644 --- a/README.md +++ b/README.md @@ -544,6 +544,80 @@ add a negation — `!vendor/`. The defaults apply uniformly, so committing a dependency or build directory doesn't force it into the graph; the `.gitignore` negation is the explicit opt-in. +## Contributing + +Set up a local checkout from your fork: + +```bash +git clone git@github.com:/codegraph.git +cd codegraph +git remote add upstream https://github.com/colbymchenry/codegraph.git +npm install +npm run build +``` + +To point the global `codegraph` command at your local build while developing: + +```bash +npm link +codegraph --version +``` + +To switch back to the published package: + +```bash +npm unlink -g @colbymchenry/codegraph +npm i -g @colbymchenry/codegraph@latest +``` + +You can also run the local build without changing the global command: + +```bash +npm run cli -- status +``` + +Useful development commands: + +```bash +npm run dev # TypeScript watch build +npm test # full test suite +npm test -- # focused test file +npm run test:watch # Vitest watch mode +``` + +### macOS Source Builds and FTS5 + +The self-contained release and npm package use CodeGraph's bundled runtime, so +end users do not need to manage SQLite or FTS5. Contributors running directly +from source use their local Node runtime instead. On macOS, some official Node +22.x and 23.x builds include `node:sqlite` without FTS5 enabled, which makes +indexing fail with `no such module: fts5`. + +Use Node 24.x for local development on macOS if you see that error: + +```bash +nvm install 24 +nvm use 24 +npm install +npm run build +``` + +Quick FTS5 check before indexing: + +```bash +node - <<'NODE' +const { DatabaseSync } = require('node:sqlite'); +const db = new DatabaseSync(':memory:'); +db.exec('CREATE VIRTUAL TABLE cg_fts_check USING fts5(value)'); +db.close(); +console.log('FTS5 available'); +NODE +``` + +PR hygiene: do not bump `package.json` versions unless you are preparing a +release, and do not update `package-lock.json` unless your change modifies +dependencies. + ## Supported Platforms Every release ships a self-contained build (bundled Node runtime — nothing to diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index b497af6a9..8c735e74b 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -33,6 +33,7 @@ function cleanupTempDir(dir: string): void { describe('Language Detection', () => { it('should detect TypeScript files', () => { expect(detectLanguage('src/index.ts')).toBe('typescript'); + expect(detectLanguage('entry/src/main/ets/pages/Index.ets')).toBe('typescript'); expect(detectLanguage('components/Button.tsx')).toBe('tsx'); }); @@ -3382,6 +3383,44 @@ describe('Directory Exclusion', () => { expect(files.every((f) => !f.includes('.git'))).toBe(true); }); + it('should index git-visible files under non-ASCII directories (issue #541)', async () => { + const { execFileSync } = await import('child_process'); + const git = (...args: string[]) => + execFileSync('git', args, { cwd: tempDir, stdio: 'pipe' }); + + git('init', '-q'); + fs.mkdirSync(path.join(tempDir, 'src', 'english'), { recursive: true }); + fs.mkdirSync(path.join(tempDir, 'src', '中文目录'), { recursive: true }); + fs.writeFileSync( + path.join(tempDir, 'src', 'english', 'Foo.cs'), + 'namespace Demo;\npublic class Foo { public void Bar() {} }\n', + ); + fs.writeFileSync( + path.join(tempDir, 'src', '中文目录', 'Baz.cs'), + 'namespace Demo;\npublic class Baz { public void Qux() {} }\n', + ); + git('add', '-A'); + + const scanned = scanDirectory(tempDir).sort(); + expect(scanned).toEqual(['src/english/Foo.cs', 'src/中文目录/Baz.cs']); + + const cg = CodeGraph.initSync(tempDir); + try { + const result = await cg.indexAll(); + expect(result.filesIndexed).toBe(2); + expect(cg.getFiles().map((f) => f.path).sort()).toEqual(scanned); + + fs.writeFileSync( + path.join(tempDir, 'src', '中文目录', 'Baz.cs'), + 'namespace Demo;\npublic class Baz { public void Qux() {} public void Zap() {} }\n', + ); + const changed = cg.getChangedFiles(); + expect([...changed.added, ...changed.modified]).toContain('src/中文目录/Baz.cs'); + } finally { + cg.close(); + } + }); + it('should return forward-slash paths on all platforms', () => { const srcDir = path.join(tempDir, 'src', 'components'); fs.mkdirSync(srcDir, { recursive: true }); diff --git a/__tests__/installer-targets.test.ts b/__tests__/installer-targets.test.ts index 27fcbd6e8..73772ec34 100644 --- a/__tests__/installer-targets.test.ts +++ b/__tests__/installer-targets.test.ts @@ -880,6 +880,14 @@ describe('Installer targets — partial-state idempotency', () => { expect(cfg.mcpServers.codegraph).toBeDefined(); }); + it('claude: auto-allow includes codegraph_files (#565)', () => { + const claude = getTarget('claude')!; + claude.install('local', { autoAllow: true }); + + const settings = JSON.parse(fs.readFileSync(path.join(tmpCwd, '.claude', 'settings.json'), 'utf-8')); + expect(settings.permissions.allow).toContain('mcp__codegraph__codegraph_files'); + }); + it('claude: install does NOT create a CLAUDE.md instructions file (#529)', () => { const claude = getTarget('claude')!; const result = claude.install('local', { autoAllow: false }); diff --git a/__tests__/installer.test.ts b/__tests__/installer.test.ts index 6f174f62d..c10f0dcfc 100644 --- a/__tests__/installer.test.ts +++ b/__tests__/installer.test.ts @@ -18,6 +18,7 @@ import * as os from 'os'; import { writeMcpConfig, } from '../src/installer/config-writer'; +import { atomicWriteFileSync } from '../src/installer/targets/shared'; function createTempDir(): string { return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-installer-test-')); @@ -99,6 +100,22 @@ describe('Installer Config Writer', () => { expect(content.mcpServers.codegraph).toBeDefined(); expect(content.mcpServers.other).toBeDefined(); expect(content.customField).toBe('preserved'); + expect(fs.existsSync(mcpJson + '.backup')).toBe(true); + const backup = JSON.parse(fs.readFileSync(mcpJson + '.backup', 'utf-8')); + expect(backup.mcpServers.codegraph).toBeUndefined(); + expect(backup.mcpServers.other).toBeDefined(); + }); + + it('should create numbered backups instead of overwriting an existing backup', () => { + const file = path.join(tempDir, 'settings.json'); + fs.writeFileSync(file, '{"version":1}\n'); + + atomicWriteFileSync(file, '{"version":2}\n'); + atomicWriteFileSync(file, '{"version":3}\n'); + + expect(fs.readFileSync(file, 'utf-8')).toContain('"version":3'); + expect(fs.readFileSync(file + '.backup', 'utf-8')).toContain('"version":1'); + expect(fs.readFileSync(file + '.backup.1', 'utf-8')).toContain('"version":2'); }); }); }); diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts index 75ac84320..dfdc0131d 100644 --- a/__tests__/security.test.ts +++ b/__tests__/security.test.ts @@ -12,7 +12,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; -import { FileLock, validateProjectPath } from '../src/utils'; +import { FileLock, validatePathWithinRoot, validateProjectPath } from '../src/utils'; import CodeGraph from '../src/index'; import { ToolHandler, tools } from '../src/mcp/tools'; import { scanDirectory, isSourceFile } from '../src/extraction'; @@ -174,6 +174,72 @@ describe('Path Traversal Prevention', () => { const code = await cg.getCode('does-not-exist'); expect(code).toBeNull(); }); + + it('should reject symlinked files that resolve outside the project root', () => { + const outsideDir = createTempDir(); + try { + const outsideFile = path.join(outsideDir, 'secret.ts'); + fs.writeFileSync(outsideFile, 'export const secret = "outside";\n'); + + const linkPath = path.join(testDir, 'src', 'secret.ts'); + try { + fs.symlinkSync(outsideFile, linkPath, 'file'); + } catch { + return; + } + + expect(validatePathWithinRoot(testDir, 'src/secret.ts')).toBeNull(); + } finally { + cleanupTempDir(outsideDir); + } + }); + + it('should reject paths beneath symlinked directories outside the project root', () => { + const outsideDir = createTempDir(); + try { + const linkPath = path.join(testDir, 'src', 'external'); + try { + fs.symlinkSync(outsideDir, linkPath, 'dir'); + } catch { + return; + } + + expect(validatePathWithinRoot(testDir, 'src/external/new-file.ts')).toBeNull(); + } finally { + cleanupTempDir(outsideDir); + } + }); + + it('should allow symlinked files that resolve inside the project root', () => { + const linkPath = path.join(testDir, 'src', 'hello-link.ts'); + try { + fs.symlinkSync(path.join(testDir, 'src', 'hello.ts'), linkPath, 'file'); + } catch { + return; + } + + expect(validatePathWithinRoot(testDir, 'src/hello-link.ts')).toBe(linkPath); + }); + + it('should not index symlinked files that resolve outside the project root', async () => { + const outsideDir = createTempDir(); + try { + const outsideFile = path.join(outsideDir, 'secret.ts'); + fs.writeFileSync(outsideFile, 'export const secret = "outside";\n'); + + try { + fs.symlinkSync(outsideFile, path.join(testDir, 'src', 'secret.ts'), 'file'); + } catch { + return; + } + + const result = await cg.indexAll(); + expect(result.success).toBe(true); + expect(cg.getFiles().map((file) => file.path)).not.toContain('src/secret.ts'); + } finally { + cleanupTempDir(outsideDir); + } + }); }); describe('validateProjectPath — sensitive directory blocking', () => { @@ -373,6 +439,7 @@ describe('Source file detection (isSourceFile)', () => { expect(isSourceFile('src/index.ts')).toBe(true); expect(isSourceFile('src/deep/nested/file.ts')).toBe(true); expect(isSourceFile('src/component.tsx')).toBe(true); + expect(isSourceFile('entry/src/main/ets/pages/Index.ets')).toBe(true); expect(isSourceFile('lib/util.js')).toBe(true); expect(isSourceFile('src/main.py')).toBe(true); }); @@ -550,6 +617,28 @@ describe('Symlink Cycle Detection', () => { const files = scanDirectory(tempDir); expect(files).toContain('src/valid.ts'); }); + + it('should skip symlinks that resolve outside the root', () => { + const srcDir = path.join(tempDir, 'src'); + fs.mkdirSync(srcDir); + fs.writeFileSync(path.join(srcDir, 'valid.ts'), 'export const valid = true;\n'); + + const outsideDir = createTempDir(); + try { + fs.writeFileSync(path.join(outsideDir, 'secret.ts'), 'export const secret = true;\n'); + try { + fs.symlinkSync(outsideDir, path.join(srcDir, 'external'), 'dir'); + } catch { + return; + } + + const files = scanDirectory(tempDir); + expect(files).toContain('src/valid.ts'); + expect(files).not.toContain('src/external/secret.ts'); + } finally { + cleanupTempDir(outsideDir); + } + }); }); describe('Session marker symlink resistance', () => { diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c9a2bcb37..99f91eaa0 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -45,6 +45,11 @@ const WASM_GRAMMAR_FILES: Record = { */ export const EXTENSION_MAP: Record = { '.ts': 'typescript', + // ArkTS (`.ets`) is a TypeScript superset used by HarmonyOS/OpenHarmony. + // The TypeScript grammar handles its common syntax well enough for first-pass + // indexing, and keeps extension selection aligned with custom .ets=typescript + // workarounds users were already applying. + '.ets': 'typescript', '.tsx': 'tsx', '.js': 'javascript', '.mjs': 'javascript', diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 42037d7f6..d1c537276 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -159,6 +159,22 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [ 'bazel-*/', // Bazel output symlink trees ]; +/** + * Git quotes non-ASCII path bytes by default (e.g. CJK segments become octal + * escapes wrapped in quotes). We parse git path output as UTF-8 strings, so all + * path-emitting commands must disable that quoting or source files under + * non-ASCII directories fail extension detection. + */ +const GIT_PATH_OUTPUT_ARGS = ['-c', 'core.quotePath=false']; + +function gitPathLines(output: string): string[] { + const lines = output.split('\n'); + if (lines[lines.length - 1] === '') { + lines.pop(); + } + return lines.filter((line) => line.length > 0); +} + /** * An `ignore` matcher seeded with the built-in defaults, merged with the project's * root .gitignore so a negation there (e.g. `!vendor/`) overrides a default. Shared @@ -198,32 +214,27 @@ function collectGitFiles(repoDir: string, prefix: string, files: Set): v // Without this, monorepos using submodules index 0 files. (See issue #147.) // Note: --recurse-submodules only supports -c/--cached and --stage modes — it // can't be combined with -o, so untracked files are gathered separately below. - const tracked = execFileSync('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts); - for (const line of tracked.split('\n')) { - const trimmed = line.trim(); - if (trimmed) { - files.add(normalizePath(prefix + trimmed)); - } + const tracked = execFileSync('git', [...GIT_PATH_OUTPUT_ARGS, 'ls-files', '-c', '--recurse-submodules'], gitOpts); + for (const filePath of gitPathLines(tracked)) { + files.add(normalizePath(prefix + filePath)); } // Untracked files (submodules manage their own untracked state). Embedded git // repos surface here as a single "subdir/" entry that git refuses to descend // into — recurse into those as their own repos so their source gets indexed. - const untracked = execFileSync('git', ['ls-files', '-o', '--exclude-standard'], gitOpts); - for (const line of untracked.split('\n')) { - const trimmed = line.trim(); - if (!trimmed) continue; - if (trimmed.endsWith('/')) { + const untracked = execFileSync('git', [...GIT_PATH_OUTPUT_ARGS, 'ls-files', '-o', '--exclude-standard'], gitOpts); + for (const filePath of gitPathLines(untracked)) { + if (filePath.endsWith('/')) { // git only emits a trailing-slash directory entry for an embedded repo. // Guard with a .git check anyway, and skip anything else exactly as git // itself skips it (we never descend into a non-repo opaque dir). - const childDir = path.join(repoDir, trimmed); + const childDir = path.join(repoDir, filePath); if (fs.existsSync(path.join(childDir, '.git'))) { - collectGitFiles(childDir, prefix + trimmed, files); + collectGitFiles(childDir, prefix + filePath, files); } continue; } - files.add(normalizePath(prefix + trimmed)); + files.add(normalizePath(prefix + filePath)); } } @@ -240,9 +251,9 @@ function getGitVisibleFiles(rootDir: string): Set | null { // `git ls-files` returns nothing — fall back to filesystem walk. const gitRoot = execFileSync( 'git', - ['rev-parse', '--show-toplevel'], + [...GIT_PATH_OUTPUT_ARGS, 'rev-parse', '--show-toplevel'], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } - ).trim(); + ).trimEnd(); if (path.resolve(gitRoot) !== path.resolve(rootDir)) { try { @@ -290,7 +301,7 @@ function getGitChangedFiles(rootDir: string): GitChanges | null { try { const output = execFileSync( 'git', - ['status', '--porcelain', '--no-renames'], + [...GIT_PATH_OUTPUT_ARGS, 'status', '--porcelain', '--no-renames'], { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true } ); @@ -298,7 +309,7 @@ function getGitChangedFiles(rootDir: string): GitChanges | null { const added: string[] = []; const deleted: string[] = []; - for (const line of output.split('\n')) { + for (const line of gitPathLines(output)) { if (line.length < 4) continue; // Minimum: "XY file" const statusCode = line.substring(0, 2); @@ -462,6 +473,10 @@ function scanDirectoryWalk( if (entry.isSymbolicLink()) { try { + if (!validatePathWithinRoot(rootDir, relativePath)) { + logDebug('Skipping symlink outside root', { path: fullPath }); + continue; + } const realTarget = fs.realpathSync(fullPath); const stat = fs.statSync(realTarget); if (stat.isDirectory()) { diff --git a/src/installer/targets/shared.ts b/src/installer/targets/shared.ts index 6d54ab570..0ee3e9211 100644 --- a/src/installer/targets/shared.ts +++ b/src/installer/targets/shared.ts @@ -38,9 +38,44 @@ export function getCodeGraphPermissions(): string[] { 'mcp__codegraph__codegraph_impact', 'mcp__codegraph__codegraph_node', 'mcp__codegraph__codegraph_status', + 'mcp__codegraph__codegraph_files', ]; } +/** + * Best-effort backup for an existing user config before we overwrite it. + * Keeps the historical `.backup` name for the first backup, then uses + * numbered siblings so later writes never destroy an earlier restore point. + */ +export function backupFileSync(filePath: string): string | null { + if (!fs.existsSync(filePath)) { + return null; + } + + let backupPath = filePath + '.backup'; + try { + if ( + fs.existsSync(backupPath) && + fs.readFileSync(backupPath).equals(fs.readFileSync(filePath)) + ) { + return backupPath; + } + } catch { + // If comparison fails, still try to create a numbered backup below. + } + + for (let i = 1; fs.existsSync(backupPath); i++) { + backupPath = `${filePath}.backup.${i}`; + } + + try { + fs.copyFileSync(filePath, backupPath); + return backupPath; + } catch { + return null; + } +} + /** * Read a JSON file, returning `{}` when missing or unparseable. * @@ -58,9 +93,7 @@ export function readJsonFile(filePath: string): Record { const msg = err instanceof Error ? err.message : String(err); console.warn(` Warning: Could not parse ${path.basename(filePath)}: ${msg}`); console.warn(` A backup will be created before overwriting.`); - try { - fs.copyFileSync(filePath, filePath + '.backup'); - } catch { /* ignore backup failure */ } + backupFileSync(filePath); return {}; } } @@ -78,6 +111,7 @@ export function atomicWriteFileSync(filePath: string, content: string): void { } const tmpPath = filePath + '.tmp.' + process.pid; try { + backupFileSync(filePath); fs.writeFileSync(tmpPath, content); fs.renameSync(tmpPath, filePath); } catch (err) { diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts index db9949a74..ff74fab16 100644 --- a/src/mcp/server-instructions.ts +++ b/src/mcp/server-instructions.ts @@ -25,8 +25,10 @@ editing code, not during. ## Answer directly — don't delegate exploration For "how does X work", architecture, trace, or where-is-X questions, -answer DIRECTLY using 2-3 codegraph calls: \`codegraph_context\` first, -then ONE \`codegraph_explore\` for the source of the symbols it surfaces. +answer DIRECTLY using 2-3 codegraph calls. If the request contains concrete +symbol names, route/path fragments, or API terms, call \`codegraph_context\` +with those short keywords first; otherwise start with \`codegraph_search\` +to find the right names, then ONE \`codegraph_explore\` for the source. Codegraph IS the pre-built search index — so delegating the lookup to a separate file-reading sub-task/agent, or running your own grep + read loop, repeats work codegraph already did and costs more for the same @@ -37,7 +39,7 @@ of calls; a grep/read exploration is dozens. ## Tool selection by intent - **"What is the symbol named X?"** → \`codegraph_search\` -- **"What's the deal with this task / feature / area?"** → \`codegraph_context\` (PRIMARY — composes search + node + callers + callees in one call) +- **"What's around keyword/symbol/path X?"** → \`codegraph_context\` with terse keywords only (composes search + node + callers + callees in one call) - **"How does X reach/become Y? / trace the flow / the path from X to Y"** → \`codegraph_trace\` (ONE call returns the whole call path, including dynamic-dispatch hops — callbacks, React re-render, JSX children — that grep can't follow) - **"What calls this?"** → \`codegraph_callers\` - **"What does this call?"** → \`codegraph_callees\` @@ -50,7 +52,7 @@ of calls; a grep/read exploration is dozens. ## Common chains - **Flow / "how does X reach Y"**: \`codegraph_trace\` from→to FIRST — one call returns the entire path with dynamic-dispatch hops bridged. Then ONE \`codegraph_explore\` for the hop bodies if you need them. Do NOT reconstruct the path with \`codegraph_search\` + \`codegraph_callers\` — that's exactly what trace does in a single call. -- **Onboarding**: \`codegraph_context\` first. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols. +- **Onboarding**: if the prompt names a concrete area, \`codegraph_context\` with short keywords first. If it is vague, \`codegraph_search\` first to discover names. If still unclear, \`codegraph_explore\` for breadth, then \`codegraph_node\` on specific symbols. - **Refactor planning**: \`codegraph_search\` → \`codegraph_callers\` → \`codegraph_impact\`. The blast-radius answer comes from impact, not from walking callers manually. - **Debugging a regression**: \`codegraph_callers\` of the suspected symbol; widen with \`codegraph_impact\` if an unexpected call appears. @@ -58,6 +60,7 @@ of calls; a grep/read exploration is dozens. - **Trust codegraph's results — don't re-verify them with grep.** They come from a full AST parse; re-checking with grep is slower, less accurate, and wastes context. - **Don't grep first** when looking up a symbol by name — \`codegraph_search\` is faster and returns kind + location + signature. +- **Don't pass prose into \`codegraph_context\`** — it is keyword-backed. Use symbol names, route fragments, API names, filenames, or a few concrete terms. - **Don't chain \`codegraph_search\` + \`codegraph_node\`** when you just want context — \`codegraph_context\` is one round-trip. - **Don't loop \`codegraph_node\` over many symbols** — one \`codegraph_explore\` call returns them all grouped by file, while each separate call re-reads the whole context and costs far more. Use \`codegraph_node\` for a single symbol. - **After editing, check the staleness banner.** When a tool response starts with "⚠️ Some files referenced below were edited since the last index sync…", the listed files are pending re-index — Read those specific files for accurate content. Every file NOT in that banner is fresh, so still trust codegraph. \`codegraph_status\` also lists pending files under "Pending sync". diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 2e9c6f816..ddb9db2b7 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -402,8 +402,8 @@ const projectPathProperty: PropertySchema = { /** * All CodeGraph MCP tools * - * Designed for minimal context usage - use codegraph_context as the primary tool, - * and only use other tools for targeted follow-up queries. + * Designed for minimal context usage - use codegraph_context with short, + * concrete search terms, and only use other tools for targeted follow-up queries. * * All tools support cross-project queries via the optional `projectPath` parameter. */ @@ -435,13 +435,13 @@ export const tools: ToolDefinition[] = [ }, { name: 'codegraph_context', - description: 'PRIMARY TOOL — call FIRST for any "how does X work"/architecture/bug question. Returns entry points + related symbols + key code in one call; usually answers without further search/Read/Grep. Provides CODE context, not product requirements.', + description: 'Comprehensive code context for short, concrete keywords/symbols/path fragments. Pass terse search terms like "UserService login" or "bondIncome/workflow/callback", not a full natural-language task description.', inputSchema: { type: 'object', properties: { task: { type: 'string', - description: 'Description of the task, bug, or feature to build context for', + description: 'Short keyword query: symbol names, route/path fragments, API names, or 2-5 concrete terms. Do not pass a verbose natural-language task sentence.', }, maxNodes: { type: 'number', diff --git a/src/utils.ts b/src/utils.ts index 1ee1c9372..9a500ac5b 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -46,9 +46,29 @@ const SENSITIVE_PATHS = new Set([ 'c:\\', 'c:\\windows', 'c:\\windows\\system32', ]); +function isResolvedPathWithinRoot(root: string, candidate: string): boolean { + const relative = path.relative(root, candidate); + return relative === '' || (relative !== '' && !relative.startsWith('..') && !path.isAbsolute(relative)); +} + +function nearestExistingPath(absPath: string): string | null { + let current = absPath; + + while (!fs.existsSync(current)) { + const parent = path.dirname(current); + if (parent === current) { + return null; + } + current = parent; + } + + return current; +} + /** * Validate that a resolved file path stays within the project root. * Prevents path traversal attacks (e.g. node.filePath = "../../etc/passwd"). + * Also resolves existing files and parent directories to catch symlink escapes. * * @param projectRoot - The project root directory * @param filePath - The relative file path to validate @@ -58,9 +78,35 @@ export function validatePathWithinRoot(projectRoot: string, filePath: string): s const resolved = path.resolve(projectRoot, filePath); const normalizedRoot = path.resolve(projectRoot); - if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) { + if (!isResolvedPathWithinRoot(normalizedRoot, resolved)) { return null; } + + try { + const realRoot = fs.realpathSync(normalizedRoot); + const realPath = fs.realpathSync(resolved); + if (!isResolvedPathWithinRoot(realRoot, realPath)) { + return null; + } + return resolved; + } catch { + // If the target does not exist, still validate the nearest existing parent + // so writes/reads under a symlinked directory cannot escape the root. + try { + const realRoot = fs.realpathSync(normalizedRoot); + const existingPath = nearestExistingPath(resolved); + if (existingPath) { + const realExistingPath = fs.realpathSync(existingPath); + if (!isResolvedPathWithinRoot(realRoot, realExistingPath)) { + return null; + } + } + } catch { + // Preserve the historical behavior for inaccessible/nonexistent roots + // after the lexical containment check above. + } + } + return resolved; } @@ -119,7 +165,7 @@ export function validateProjectPath(dirPath: string): string | null { export function isPathWithinRoot(filePath: string, rootDir: string): boolean { const resolvedPath = path.resolve(rootDir, filePath); const resolvedRoot = path.resolve(rootDir); - return resolvedPath.startsWith(resolvedRoot + path.sep) || resolvedPath === resolvedRoot; + return isResolvedPathWithinRoot(resolvedRoot, resolvedPath); } /**