From 98d6c31485d10baf28fc60cff940080c3510049f Mon Sep 17 00:00:00 2001 From: andreinknv Date: Sun, 26 Apr 2026 12:56:06 -0400 Subject: [PATCH] fix(scan): honor .codegraphignore on the git fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .codegraphignore marker (per-directory opt-out from indexing) was respected by `scanDirectoryWalk` (the filesystem-walk fallback) but silently ignored by `getGitVisibleFiles` (the git fast path) and `getGitChangedFiles` (sync's git path). Same project gave different file sets depending on whether `.git` existed — typically the marker "worked" only on non-git scratch projects and was a no-op everywhere else, which is the opposite of how most users encounter it. This change adds two helpers in `src/extraction/index.ts`: - `findCodegraphIgnoredDirs(rootDir, files)` — walks parent directories of the given file list, returns the set of directories that contain a `.codegraphignore` marker. Walks once per unique parent directory, with an early-out on shared ancestors. - `isUnderCodegraphIgnoredDir(filePath, ignoredDirs)` — true if filePath lives under any of those dirs. Applied in: - `scanDirectory` and `scanDirectoryAsync` — between the git file list and the include-pattern filter. - `getGitChangedFiles` — refactored to a two-pass collect-then-bucketize so the ignored-dir set is built once from the candidate paths. The marker file itself does not need to be tracked by git — fs.existsSync catches it whether it was committed or added as a local override. ## Files changed | File | Change | |---|---| | src/extraction/index.ts | Add findCodegraphIgnoredDirs + isUnderCodegraphIgnoredDir; apply in scanDirectory, scanDirectoryAsync, getGitChangedFiles | | __tests__/codegraphignore.test.ts | 6 regression tests | ## Test plan - [x] npm test: 386/386 pass on macOS (one pre-existing fs.watch flake under parallel load, passes in isolation) - [x] npx tsc --noEmit clean - [x] Independent reviewer pass before pushing — APPROVE; addressed two info-level cleanups (JSDoc accuracy, removed dead try/catch around fs.existsSync) Co-Authored-By: Claude Opus 4.7 (1M context) --- __tests__/codegraphignore.test.ts | 168 ++++++++++++++++++++++++++++++ src/extraction/index.ts | 78 ++++++++++++-- 2 files changed, 237 insertions(+), 9 deletions(-) create mode 100644 __tests__/codegraphignore.test.ts diff --git a/__tests__/codegraphignore.test.ts b/__tests__/codegraphignore.test.ts new file mode 100644 index 00000000..4d7e58c5 --- /dev/null +++ b/__tests__/codegraphignore.test.ts @@ -0,0 +1,168 @@ +/** + * .codegraphignore Tests + * + * Regression test for the bug where the .codegraphignore marker file was + * honored by the filesystem-walk fallback (`scanDirectoryWalk`) but + * silently ignored by the git fast path (`getGitVisibleFiles` and + * `getGitChangedFiles`). Same project gave different file sets depending + * on whether `.git` existed. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { execFileSync } from 'child_process'; +import { scanDirectory } from '../src/extraction'; +import { DEFAULT_CONFIG, CodeGraphConfig } from '../src/types'; +import CodeGraph from '../src/index'; + +function tempDir(prefix: string): string { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +function git(cwd: string, ...args: string[]) { + execFileSync('git', args, { cwd, stdio: 'pipe' }); +} + +const config: CodeGraphConfig = { + ...DEFAULT_CONFIG, + include: ['**/*.ts'], + exclude: [], +}; + +describe('.codegraphignore marker (bug #3)', () => { + describe('git fast path', () => { + let dir: string; + + beforeEach(() => { + dir = tempDir('codegraph-ignore-git-'); + git(dir, 'init'); + git(dir, 'config', 'user.email', 'test@test.com'); + git(dir, 'config', 'user.name', 'Test'); + // Pin branch name for determinism across git defaults + git(dir, 'symbolic-ref', 'HEAD', 'refs/heads/main'); + + fs.mkdirSync(path.join(dir, 'src')); + fs.mkdirSync(path.join(dir, 'vendor')); + fs.mkdirSync(path.join(dir, 'vendor', 'lib')); + fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;'); + fs.writeFileSync(path.join(dir, 'vendor', 'pkg.ts'), 'export const v = 1;'); + fs.writeFileSync(path.join(dir, 'vendor', 'lib', 'sub.ts'), 'export const s = 1;'); + // Mark vendor/ as ignored + fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), ''); + + git(dir, 'add', '-A'); + git(dir, 'commit', '-m', 'initial'); + }); + + afterEach(() => { + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('scanDirectory honors .codegraphignore on the git fast path', () => { + const files = scanDirectory(dir, config); + expect(files).toContain('src/app.ts'); + expect(files).not.toContain('vendor/pkg.ts'); + expect(files).not.toContain('vendor/lib/sub.ts'); + }); + + it('marker at project root excludes everything', () => { + fs.writeFileSync(path.join(dir, '.codegraphignore'), ''); + // Need to add it to git so ls-files sees it (or rely on -o) + git(dir, 'add', '-A'); + git(dir, 'commit', '-m', 'add root marker'); + const files = scanDirectory(dir, config); + expect(files).toEqual([]); + }); + + it('marker in nested subdir does not affect siblings', () => { + // Add another sibling subdir without a marker + fs.mkdirSync(path.join(dir, 'libs')); + fs.writeFileSync(path.join(dir, 'libs', 'util.ts'), 'export const u = 1;'); + git(dir, 'add', '-A'); + git(dir, 'commit', '-m', 'add libs'); + + const files = scanDirectory(dir, config); + expect(files).toContain('src/app.ts'); + expect(files).toContain('libs/util.ts'); + expect(files).not.toContain('vendor/pkg.ts'); + }); + + it('respects marker added after initial commit (untracked marker)', () => { + // The marker file itself need not be committed — it can be a local + // override. Add marker AFTER commit, do not commit it. + fs.mkdirSync(path.join(dir, 'generated')); + fs.writeFileSync(path.join(dir, 'generated', 'gen.ts'), 'export const g = 1;'); + fs.writeFileSync(path.join(dir, 'generated', '.codegraphignore'), ''); + // The .ts file is untracked but visible via `git ls-files -o`. + // The marker is also untracked — we still detect it via fs check. + + const files = scanDirectory(dir, config); + expect(files).not.toContain('generated/gen.ts'); + }); + }); + + describe('parity with non-git fallback (filesystem walk)', () => { + let dir: string; + + beforeEach(() => { + dir = tempDir('codegraph-ignore-walk-'); + fs.mkdirSync(path.join(dir, 'src')); + fs.mkdirSync(path.join(dir, 'vendor')); + fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;'); + fs.writeFileSync(path.join(dir, 'vendor', 'pkg.ts'), 'export const v = 1;'); + fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), ''); + }); + + afterEach(() => { + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('non-git project also honors the marker (sanity / pre-existing behavior)', () => { + const files = scanDirectory(dir, config); + expect(files).toContain('src/app.ts'); + expect(files).not.toContain('vendor/pkg.ts'); + }); + }); + + describe('sync git path (getGitChangedFiles)', () => { + let dir: string; + let cg: CodeGraph; + + beforeEach(async () => { + dir = tempDir('codegraph-ignore-sync-'); + git(dir, 'init'); + git(dir, 'config', 'user.email', 'test@test.com'); + git(dir, 'config', 'user.name', 'Test'); + git(dir, 'symbolic-ref', 'HEAD', 'refs/heads/main'); + + fs.mkdirSync(path.join(dir, 'src')); + fs.mkdirSync(path.join(dir, 'vendor')); + fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 1;'); + fs.writeFileSync(path.join(dir, 'vendor', '.codegraphignore'), ''); + + git(dir, 'add', '-A'); + git(dir, 'commit', '-m', 'initial'); + + cg = CodeGraph.initSync(dir, { config: { include: ['**/*.ts'], exclude: [] } }); + await cg.indexAll(); + }); + + afterEach(() => { + if (cg) cg.destroy(); + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('sync ignores changes inside marker dirs', async () => { + // Add a new file under vendor/ — should NOT be picked up by sync. + fs.writeFileSync(path.join(dir, 'vendor', 'leaked.ts'), 'export const x = 1;'); + // Also add a real change to confirm sync still runs. + fs.writeFileSync(path.join(dir, 'src', 'app.ts'), 'export const a = 2;'); + + const result = await cg.sync(); + expect(result.changedFilePaths).toContain('src/app.ts'); + expect(result.changedFilePaths ?? []).not.toContain('vendor/leaked.ts'); + }); + }); +}); diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 4ad056fb..2892c69e 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -196,22 +196,32 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] } ); - const modified: string[] = []; - const added: string[] = []; - const deleted: string[] = []; - + // Two-pass: collect candidate paths first so we can build the + // .codegraphignore directory set in one go, then re-walk to bucketize. + const candidatePaths: { code: string; filePath: string }[] = []; for (const line of output.split('\n')) { if (line.length < 4) continue; // Minimum: "XY file" - const statusCode = line.substring(0, 2); const filePath = normalizePath(line.substring(3)); - - // Skip files that don't match include/exclude config if (!shouldIncludeFile(filePath, config)) continue; + candidatePaths.push({ code: statusCode, filePath }); + } - if (statusCode === '??') { + const ignoredDirs = findCodegraphIgnoredDirs( + rootDir, + candidatePaths.map((c) => c.filePath) + ); + + const modified: string[] = []; + const added: string[] = []; + const deleted: string[] = []; + + for (const { code, filePath } of candidatePaths) { + if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue; + + if (code === '??') { added.push(filePath); - } else if (statusCode.includes('D')) { + } else if (code.includes('D')) { deleted.push(filePath); } else { // M, MM, AM, A (staged), etc. — treat as modified @@ -230,6 +240,52 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange */ const CODEGRAPH_IGNORE_MARKER = '.codegraphignore'; +/** + * Walk every parent directory of the given files (relative to rootDir) and + * return the subset that contain a `.codegraphignore` marker. Anything + * under one of these directories should be excluded. + * + * Called by `scanDirectory`, `scanDirectoryAsync`, and `getGitChangedFiles` + * so the git-driven paths honor the marker the same way the filesystem + * walk fallback does. Without this the marker had inconsistent behavior: + * respected on non-git projects, silently ignored on git ones. + */ +function findCodegraphIgnoredDirs(rootDir: string, files: Iterable): Set { + const dirs = new Set(['.']); + for (const file of files) { + let dir = path.posix.dirname(normalizePath(file)); + while (dir && dir !== '.' && dir !== '/') { + if (dirs.has(dir)) break; // already enumerated this branch + dirs.add(dir); + dir = path.posix.dirname(dir); + } + } + + const ignored = new Set(); + for (const dir of dirs) { + const marker = dir === '.' + ? path.join(rootDir, CODEGRAPH_IGNORE_MARKER) + : path.join(rootDir, dir, CODEGRAPH_IGNORE_MARKER); + if (fs.existsSync(marker)) ignored.add(dir); + } + return ignored; +} + +/** + * True if `filePath` (relative, forward-slashed) lives under any directory + * in `ignoredDirs`. Directory `.` matches the project root. + */ +function isUnderCodegraphIgnoredDir(filePath: string, ignoredDirs: Set): boolean { + if (ignoredDirs.size === 0) return false; + if (ignoredDirs.has('.')) return true; + let dir = path.posix.dirname(filePath); + while (dir && dir !== '.' && dir !== '/') { + if (ignoredDirs.has(dir)) return true; + dir = path.posix.dirname(dir); + } + return false; +} + /** * Recursively scan directory for source files. * @@ -245,9 +301,11 @@ export function scanDirectory( // Fast path: use git to get all visible files (respects .gitignore everywhere) const gitFiles = getGitVisibleFiles(rootDir); if (gitFiles) { + const ignoredDirs = findCodegraphIgnoredDirs(rootDir, gitFiles); const files: string[] = []; let count = 0; for (const filePath of gitFiles) { + if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue; if (shouldIncludeFile(filePath, config)) { files.push(filePath); count++; @@ -272,9 +330,11 @@ export async function scanDirectoryAsync( ): Promise { const gitFiles = getGitVisibleFiles(rootDir); if (gitFiles) { + const ignoredDirs = findCodegraphIgnoredDirs(rootDir, gitFiles); const files: string[] = []; let count = 0; for (const filePath of gitFiles) { + if (isUnderCodegraphIgnoredDir(filePath, ignoredDirs)) continue; if (shouldIncludeFile(filePath, config)) { files.push(filePath); count++;