diff --git a/CHANGELOG.md b/CHANGELOG.md index 87a4a3b9..20a2b9bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,37 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). grammar, and `web/core`, `web/modules/contrib`, `web/themes/contrib` are excluded by default. Resolves [#268](https://github.com/colbymchenry/codegraph/issues/268). +### Changed +- **Zero-config indexing that respects `.gitignore`.** CodeGraph no longer has a + config file. It indexes every file whose extension maps to a supported language + and honors your `.gitignore` everywhere: in git repos via git itself, and in + non-git projects (e.g. a freshly-scaffolded app before `git init`) by reading + `.gitignore` files directly — root and nested, the same way git does (via the + `ignore` library, so negation/anchoring/nested rules all behave correctly). To + keep something out of the graph, add it to `.gitignore`. **Behavior change:** + committed files that are *not* gitignored are now indexed even under `vendor/`, + `Pods/`, or a committed `dist/` — previously a hardcoded exclude list skipped + those names; now `.gitignore` is the single source of truth. Resolves + [#283](https://github.com/colbymchenry/codegraph/issues/283). + +### Removed +- **`.codegraph/config.json` and the entire config surface.** Every field was + either inert or now redundant with `.gitignore`: + - `languages`/`frameworks` never affected indexing (languages are detected per + file from extensions; frameworks are auto-detected). `languages` was also + broken — its validator only knew the original 8 languages, so setting it to + anything newer (C#, PHP, Ruby, C/C++, Swift, Kotlin, Dart, Vue, Scala, Lua, …) + threw `Invalid configuration format`. + - `extractDocstrings`/`trackCallSites`/`customPatterns` were never read by any + extractor. + - `include` is now derived from the supported language extensions, `exclude` is + replaced by `.gitignore`, and `maxFileSize` (1 MB) is a constant. + + **Breaking (library API):** the `CodeGraphConfig` type, the `config` option on + `CodeGraph.init()`, and the `getConfig()`/`updateConfig()`/`getConfigPath` + exports are gone. Existing `.codegraph/config.json` files are simply ignored. + The `.codegraphignore` marker is no longer supported — use `.gitignore`. + ## [0.9.1] - 2026-05-21 ### Fixed diff --git a/README.md b/README.md index 17bd2042..598ac5b0 100644 --- a/README.md +++ b/README.md @@ -418,28 +418,23 @@ cg.close(); ## Configuration -The `.codegraph/config.json` file controls indexing: - -```json -{ - "version": 1, - "languages": ["typescript", "javascript"], - "exclude": ["node_modules/**", "dist/**", "build/**", "*.min.js"], - "frameworks": [], - "maxFileSize": 1048576, - "extractDocstrings": true, - "trackCallSites": true -} -``` - -| Option | Description | Default | -|--------|-------------|---------| -| `languages` | Languages to index (auto-detected if empty) | `[]` | -| `exclude` | Glob patterns to ignore | `["node_modules/**", ...]` | -| `frameworks` | Framework hints for better resolution | `[]` | -| `maxFileSize` | Skip files larger than this (bytes) | `1048576` (1MB) | -| `extractDocstrings` | Extract docstrings from code | `true` | -| `trackCallSites` | Track call site locations | `true` | +There isn't any — CodeGraph is zero-config. It indexes every file whose +extension maps to a [supported language](#supported-languages) and **respects +your `.gitignore`**: in git repos via git itself, and in non-git projects by +reading `.gitignore` files directly (root and nested, the same way git would). + +What that means in practice: + +- Anything git ignores — `node_modules`, build output, secrets in `.env` — is + never indexed. **To keep something out of the graph, add it to `.gitignore`.** +- There's no config file to write or keep in sync, and nothing to wire up per + language: support is automatic from the file extension. +- Files larger than 1 MB are skipped (generated bundles, minified JS, vendored + blobs) — they cost parse budget for no useful symbols. + +> Committed files that aren't gitignored *are* indexed, even under `vendor/` or a +> committed `dist/`. If you commit a dependency or build directory you don't want +> in the graph, add it to `.gitignore`. ## Supported Languages diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 1b121478..92717759 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -9,10 +9,9 @@ import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; import { CodeGraph } from '../src'; -import { extractFromSource, scanDirectory, shouldIncludeFile } from '../src/extraction'; +import { extractFromSource, scanDirectory } from '../src/extraction'; import { detectLanguage, isLanguageSupported, getSupportedLanguages, initGrammars, loadAllGrammars } from '../src/extraction/grammars'; import { normalizePath } from '../src/utils'; -import { DEFAULT_CONFIG } from '../src/types'; beforeAll(async () => { await initGrammars(); @@ -3003,39 +3002,57 @@ describe('Directory Exclusion', () => { cleanupTempDir(tempDir); }); - it('should exclude node_modules directories', () => { - // Create structure: src/index.ts + node_modules/pkg/index.js + it('should exclude directories listed in .gitignore', () => { + // Create structure: src/index.ts + node_modules/pkg/index.js, gitignore node_modules const srcDir = path.join(tempDir, 'src'); const nmDir = path.join(tempDir, 'node_modules', 'pkg'); fs.mkdirSync(srcDir, { recursive: true }); fs.mkdirSync(nmDir, { recursive: true }); fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;'); fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'node_modules/\n'); - const config = { ...DEFAULT_CONFIG, rootDir: tempDir }; - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); expect(files).toContain('src/index.ts'); expect(files.every((f) => !f.includes('node_modules'))).toBe(true); }); - it('should exclude nested node_modules directories', () => { - // Create structure: packages/app/node_modules/pkg/index.js + it('should exclude nested node_modules via a root .gitignore', () => { + // A trailing-slash pattern with no leading slash matches at any depth. const srcDir = path.join(tempDir, 'packages', 'app', 'src'); const nmDir = path.join(tempDir, 'packages', 'app', 'node_modules', 'pkg'); fs.mkdirSync(srcDir, { recursive: true }); fs.mkdirSync(nmDir, { recursive: true }); fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;'); fs.writeFileSync(path.join(nmDir, 'index.js'), 'module.exports = {};'); + fs.writeFileSync(path.join(tempDir, '.gitignore'), 'node_modules/\n'); - const config = { ...DEFAULT_CONFIG, rootDir: tempDir }; - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); expect(files).toContain('packages/app/src/index.ts'); expect(files.every((f) => !f.includes('node_modules'))).toBe(true); }); - it('should exclude .git directories', () => { + it('should apply a nested .gitignore only to its own subtree', () => { + const appSrc = path.join(tempDir, 'app', 'src'); + fs.mkdirSync(appSrc, { recursive: true }); + fs.writeFileSync(path.join(appSrc, 'keep.ts'), 'export const a = 1;'); + fs.writeFileSync(path.join(appSrc, 'skip.ts'), 'export const b = 2;'); + fs.writeFileSync(path.join(tempDir, 'app', '.gitignore'), 'src/skip.ts\n'); + // A sibling with the same name outside app/ must NOT be ignored. + const otherDir = path.join(tempDir, 'other', 'src'); + fs.mkdirSync(otherDir, { recursive: true }); + fs.writeFileSync(path.join(otherDir, 'skip.ts'), 'export const c = 3;'); + + const files = scanDirectory(tempDir); + + expect(files).toContain('app/src/keep.ts'); + expect(files).not.toContain('app/src/skip.ts'); + expect(files).toContain('other/src/skip.ts'); + }); + + it('should always skip .git directories', () => { const srcDir = path.join(tempDir, 'src'); const gitDir = path.join(tempDir, '.git', 'objects'); fs.mkdirSync(srcDir, { recursive: true }); @@ -3043,8 +3060,7 @@ describe('Directory Exclusion', () => { fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;'); fs.writeFileSync(path.join(gitDir, 'pack.ts'), 'export const y = 2;'); - const config = { ...DEFAULT_CONFIG, rootDir: tempDir }; - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); expect(files).toContain('src/index.ts'); expect(files.every((f) => !f.includes('.git'))).toBe(true); @@ -3055,29 +3071,12 @@ describe('Directory Exclusion', () => { fs.mkdirSync(srcDir, { recursive: true }); fs.writeFileSync(path.join(srcDir, 'Button.tsx'), 'export function Button() {}'); - const config = { ...DEFAULT_CONFIG, rootDir: tempDir }; - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); expect(files.length).toBe(1); expect(files[0]).toBe('src/components/Button.tsx'); expect(files[0]).not.toContain('\\'); }); - - it('should respect .codegraphignore marker', () => { - const srcDir = path.join(tempDir, 'src'); - const vendorDir = path.join(tempDir, 'vendor'); - fs.mkdirSync(srcDir, { recursive: true }); - fs.mkdirSync(vendorDir, { recursive: true }); - fs.writeFileSync(path.join(srcDir, 'index.ts'), 'export const x = 1;'); - fs.writeFileSync(path.join(vendorDir, 'lib.ts'), 'export const y = 2;'); - fs.writeFileSync(path.join(vendorDir, '.codegraphignore'), ''); - - const config = { ...DEFAULT_CONFIG, rootDir: tempDir }; - const files = scanDirectory(tempDir, config); - - expect(files).toContain('src/index.ts'); - expect(files.every((f) => !f.includes('vendor'))).toBe(true); - }); }); describe('Git Submodules', () => { @@ -3124,8 +3123,7 @@ describe('Git Submodules', () => { ); git(mainDir, 'commit', '-q', '-m', 'add submodule'); - const config = { ...DEFAULT_CONFIG, rootDir: mainDir }; - const files = scanDirectory(mainDir, config); + const files = scanDirectory(mainDir); expect(files).toContain('app.ts'); expect(files).toContain('libs/lib/lib.ts'); @@ -3173,8 +3171,7 @@ describe('Nested non-submodule git repos', () => { git(path.join(root, 'sub_repo2'), 'init', '-q'); fs.writeFileSync(path.join(sub2, 'two.ts'), 'export const two = 2;'); - const config = { ...DEFAULT_CONFIG, rootDir: root }; - const files = scanDirectory(root, config); + const files = scanDirectory(root); // Both committed and untracked source from the nested repos must be found. expect(files).toContain('sub_repo1/src/one.ts'); @@ -3197,8 +3194,7 @@ describe('Nested non-submodule git repos', () => { fs.writeFileSync(path.join(sub, 'real.ts'), 'export const real = 1;'); fs.writeFileSync(path.join(sub, 'generated.ts'), 'export const generated = 1;'); - const config = { ...DEFAULT_CONFIG, rootDir: root }; - const files = scanDirectory(root, config); + const files = scanDirectory(root); expect(files).toContain('sub_repo/src/real.ts'); expect(files).not.toContain('sub_repo/src/generated.ts'); diff --git a/__tests__/foundation.test.ts b/__tests__/foundation.test.ts index 4e8f204a..78ebfce4 100644 --- a/__tests__/foundation.test.ts +++ b/__tests__/foundation.test.ts @@ -9,8 +9,7 @@ import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; import { CodeGraph } from '../src'; -import { DEFAULT_CONFIG, Node, Edge } from '../src/types'; -import { loadConfig, saveConfig } from '../src/config'; +import { Node, Edge } from '../src/types'; import { isInitialized, getCodeGraphDir, validateDirectory } from '../src/directory'; import { DatabaseConnection, getDatabasePath } from '../src/db'; @@ -60,41 +59,12 @@ describe('CodeGraph Foundation', () => { cg.close(); }); - it('should create config.json with defaults', () => { - const cg = CodeGraph.initSync(tempDir); - - const configPath = path.join(getCodeGraphDir(tempDir), 'config.json'); - expect(fs.existsSync(configPath)).toBe(true); - - const config = cg.getConfig(); - expect(config.version).toBe(DEFAULT_CONFIG.version); - expect(config.include).toEqual(DEFAULT_CONFIG.include); - expect(config.exclude).toEqual(DEFAULT_CONFIG.exclude); - - cg.close(); - }); - it('should throw if already initialized', () => { const cg = CodeGraph.initSync(tempDir); cg.close(); expect(() => CodeGraph.initSync(tempDir)).toThrow(/already initialized/i); }); - - it('should accept custom config options', () => { - const cg = CodeGraph.initSync(tempDir, { - config: { - maxFileSize: 500000, - extractDocstrings: false, - }, - }); - - const config = cg.getConfig(); - expect(config.maxFileSize).toBe(500000); - expect(config.extractDocstrings).toBe(false); - - cg.close(); - }); }); describe('Opening Projects', () => { @@ -112,17 +82,6 @@ describe('CodeGraph Foundation', () => { it('should throw if not initialized', () => { expect(() => CodeGraph.openSync(tempDir)).toThrow(/not initialized/i); }); - - it('should preserve configuration across open/close', () => { - const cg1 = CodeGraph.initSync(tempDir, { - config: { maxFileSize: 123456 }, - }); - cg1.close(); - - const cg2 = CodeGraph.openSync(tempDir); - expect(cg2.getConfig().maxFileSize).toBe(123456); - cg2.close(); - }); }); describe('Static Methods', () => { @@ -182,31 +141,6 @@ describe('CodeGraph Foundation', () => { }); }); - describe('Configuration', () => { - it('should load and merge config with defaults', () => { - const cg = CodeGraph.initSync(tempDir); - cg.close(); - - const config = loadConfig(tempDir); - expect(config.version).toBe(DEFAULT_CONFIG.version); - expect(config.rootDir).toBe(path.resolve(tempDir)); - }); - - it('should update configuration', () => { - const cg = CodeGraph.initSync(tempDir); - - cg.updateConfig({ maxFileSize: 999999 }); - - expect(cg.getConfig().maxFileSize).toBe(999999); - - cg.close(); - - // Verify persistence - const config = loadConfig(tempDir); - expect(config.maxFileSize).toBe(999999); - }); - }); - describe('Directory Management', () => { it('should validate directory structure', () => { const cg = CodeGraph.initSync(tempDir); diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts index b923a342..782b99da 100644 --- a/__tests__/security.test.ts +++ b/__tests__/security.test.ts @@ -15,9 +15,7 @@ import * as os from 'os'; import { FileLock } from '../src/utils'; import CodeGraph from '../src/index'; import { ToolHandler, tools } from '../src/mcp/tools'; -import { shouldIncludeFile, scanDirectory } from '../src/extraction'; -import { shouldIncludeFile as configShouldInclude } from '../src/config'; -import { CodeGraphConfig, DEFAULT_CONFIG } from '../src/types'; +import { scanDirectory, isSourceFile } from '../src/extraction'; import { DatabaseConnection, getDatabasePath } from '../src/db'; import { QueryBuilder } from '../src/db/queries'; @@ -298,58 +296,24 @@ describe('Atomic Writes', () => { }); }); -describe('Glob Matching (picomatch)', () => { - const makeConfig = (include: string[], exclude: string[]): CodeGraphConfig => ({ - ...DEFAULT_CONFIG, - rootDir: '/test', - include, - exclude, +describe('Source file detection (isSourceFile)', () => { + it('selects files by supported extension', () => { + expect(isSourceFile('src/index.ts')).toBe(true); + expect(isSourceFile('src/deep/nested/file.ts')).toBe(true); + expect(isSourceFile('src/component.tsx')).toBe(true); + expect(isSourceFile('lib/util.js')).toBe(true); + expect(isSourceFile('src/main.py')).toBe(true); }); - it('should match standard glob patterns in extraction', () => { - const config = makeConfig(['**/*.ts'], ['node_modules/**']); - - expect(shouldIncludeFile('src/index.ts', config)).toBe(true); - expect(shouldIncludeFile('src/deep/nested/file.ts', config)).toBe(true); - expect(shouldIncludeFile('src/index.js', config)).toBe(false); - expect(shouldIncludeFile('node_modules/lib/index.ts', config)).toBe(false); - }); - - it('should match standard glob patterns in config', () => { - const config = makeConfig(['**/*.py'], ['__pycache__/**']); - - expect(configShouldInclude('src/main.py', config)).toBe(true); - expect(configShouldInclude('src/main.ts', config)).toBe(false); - expect(configShouldInclude('__pycache__/module.py', config)).toBe(false); - }); - - it('should handle complex glob patterns correctly', () => { - const config = makeConfig(['src/**/*.{ts,tsx}', 'lib/**/*.js'], []); - - expect(shouldIncludeFile('src/component.ts', config)).toBe(true); - expect(shouldIncludeFile('src/component.tsx', config)).toBe(true); - expect(shouldIncludeFile('lib/util.js', config)).toBe(true); - expect(shouldIncludeFile('src/component.css', config)).toBe(false); - }); - - it('should handle patterns that previously caused ReDoS', () => { - // This pattern would cause catastrophic backtracking with hand-rolled regex - const evilPattern = '**/**/**/**/**/**/**/**/**/**/**/**/**/**/a'; - const config = makeConfig([evilPattern], []); - - const start = Date.now(); - // This should return quickly, not hang - shouldIncludeFile('x/x/x/x/x/x/x/x/x/x/x/x/x/x/b', config); - const elapsed = Date.now() - start; - - // Should complete in under 100ms, not seconds - expect(elapsed).toBeLessThan(100); + it('rejects unsupported extensions and extensionless files', () => { + expect(isSourceFile('src/component.css')).toBe(false); + expect(isSourceFile('README.md')).toBe(false); + expect(isSourceFile('Makefile')).toBe(false); + expect(isSourceFile('.gitignore')).toBe(false); }); - it('should handle dot files correctly', () => { - const config = makeConfig(['**/*.ts'], []); - - expect(shouldIncludeFile('.hidden/index.ts', config)).toBe(true); + it('matches regardless of leading dot directories', () => { + expect(isSourceFile('.hidden/index.ts')).toBe(true); }); }); @@ -464,15 +428,9 @@ describe('Symlink Cycle Detection', () => { return; } - const config: CodeGraphConfig = { - ...DEFAULT_CONFIG, - rootDir: tempDir, - include: ['**/*.ts'], - exclude: [], - }; // This should complete without hanging - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); // Should find the real file but not loop infinitely expect(files).toContain('src/index.ts'); @@ -496,14 +454,8 @@ describe('Symlink Cycle Detection', () => { return; } - const config: CodeGraphConfig = { - ...DEFAULT_CONFIG, - rootDir: tempDir, - include: ['**/*.ts'], - exclude: [], - }; - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); // Should find files from both the real dir and via the symlink // But deduplicate since they resolve to the same real path @@ -521,15 +473,9 @@ describe('Symlink Cycle Detection', () => { return; } - const config: CodeGraphConfig = { - ...DEFAULT_CONFIG, - rootDir: tempDir, - include: ['**/*.ts'], - exclude: [], - }; // Should not throw - const files = scanDirectory(tempDir, config); + const files = scanDirectory(tempDir); expect(files).toContain('src/valid.ts'); }); }); diff --git a/__tests__/sync.test.ts b/__tests__/sync.test.ts index 374e7788..708a92a4 100644 --- a/__tests__/sync.test.ts +++ b/__tests__/sync.test.ts @@ -281,11 +281,11 @@ describe('Sync Module', () => { expect(nodes.length).toBe(0); }); - it('should skip files not matching config', async () => { - // Create a .js file which doesn't match **/*.ts + it('should skip files with unsupported extensions', async () => { + // A .txt file has no supported grammar, so sync must not index it. fs.writeFileSync( - path.join(testDir, 'src', 'ignored.js'), - `function ignored() {}` + path.join(testDir, 'src', 'notes.txt'), + `just some notes` ); const result = await cg.sync(); diff --git a/__tests__/watch-policy.test.ts b/__tests__/watch-policy.test.ts index ee50d8c9..5cb92ce7 100644 --- a/__tests__/watch-policy.test.ts +++ b/__tests__/watch-policy.test.ts @@ -12,7 +12,6 @@ import * as path from 'path'; import * as os from 'os'; import { watchDisabledReason } from '../src/sync/watch-policy'; import { FileWatcher } from '../src/sync/watcher'; -import type { CodeGraphConfig } from '../src/types'; describe('watchDisabledReason', () => { it('returns a reason when CODEGRAPH_NO_WATCH=1', () => { @@ -63,18 +62,6 @@ describe('watchDisabledReason', () => { describe('FileWatcher honors the watch policy', () => { let testDir: string; - const baseConfig: CodeGraphConfig = { - version: 1, - rootDir: '.', - include: ['**/*.ts'], - exclude: ['**/node_modules/**'], - languages: [], - frameworks: [], - maxFileSize: 1024 * 1024, - extractDocstrings: true, - trackCallSites: true, - }; - afterEach(() => { delete process.env.CODEGRAPH_NO_WATCH; if (testDir && fs.existsSync(testDir)) { @@ -87,7 +74,7 @@ describe('FileWatcher honors the watch policy', () => { process.env.CODEGRAPH_NO_WATCH = '1'; const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn); + const watcher = new FileWatcher(testDir, syncFn); expect(watcher.start()).toBe(false); expect(watcher.isActive()).toBe(false); diff --git a/__tests__/watcher.test.ts b/__tests__/watcher.test.ts index f3638e6d..fde5f593 100644 --- a/__tests__/watcher.test.ts +++ b/__tests__/watcher.test.ts @@ -9,7 +9,6 @@ import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; import { FileWatcher } from '../src/sync/watcher'; -import type { CodeGraphConfig } from '../src/types'; import CodeGraph from '../src/index'; /** @@ -34,18 +33,6 @@ function waitFor( describe('FileWatcher', () => { let testDir: string; - const baseConfig: CodeGraphConfig = { - version: 1, - rootDir: '.', - include: ['**/*.ts', '**/*.js'], - exclude: ['**/node_modules/**', '**/dist/**'], - languages: [], - frameworks: [], - maxFileSize: 1024 * 1024, - extractDocstrings: true, - trackCallSites: true, - }; - beforeEach(() => { testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-watcher-')); // Create a source file so the directory isn't empty @@ -63,7 +50,7 @@ describe('FileWatcher', () => { describe('start/stop lifecycle', () => { it('should start and stop without errors', () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn); + const watcher = new FileWatcher(testDir, syncFn); const started = watcher.start(); expect(started).toBe(true); @@ -75,7 +62,7 @@ describe('FileWatcher', () => { it('should be idempotent on double start', () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn); + const watcher = new FileWatcher(testDir, syncFn); expect(watcher.start()).toBe(true); expect(watcher.start()).toBe(true); // Should not throw @@ -86,7 +73,7 @@ describe('FileWatcher', () => { it('should be idempotent on double stop', () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn); + const watcher = new FileWatcher(testDir, syncFn); watcher.start(); watcher.stop(); @@ -98,7 +85,7 @@ describe('FileWatcher', () => { describe('debounced sync', () => { it('should trigger sync after file change', async () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 1, durationMs: 10 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 200 }); + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 }); watcher.start(); @@ -114,7 +101,7 @@ describe('FileWatcher', () => { it('should debounce rapid changes into a single sync', async () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 1, durationMs: 10 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 500 }); + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 500 }); watcher.start(); @@ -140,7 +127,7 @@ describe('FileWatcher', () => { describe('filtering', () => { it('should ignore files not matching include patterns', async () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 200 }); + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 }); watcher.start(); @@ -160,7 +147,7 @@ describe('FileWatcher', () => { it('should ignore .codegraph directory changes', async () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { debounceMs: 200 }); + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 }); watcher.start(); @@ -185,7 +172,7 @@ describe('FileWatcher', () => { it('should call onSyncComplete after successful sync', async () => { const syncFn = vi.fn().mockResolvedValue({ filesChanged: 2, durationMs: 50 }); const onSyncComplete = vi.fn(); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200, onSyncComplete, }); @@ -203,7 +190,7 @@ describe('FileWatcher', () => { it('should call onSyncError when sync throws', async () => { const syncFn = vi.fn().mockRejectedValue(new Error('sync failed')); const onSyncError = vi.fn(); - const watcher = new FileWatcher(testDir, baseConfig, syncFn, { + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200, onSyncError, }); diff --git a/package-lock.json b/package-lock.json index 05a37245..d96712a0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", + "ignore": "^7.0.5", "jsonc-parser": "^3.3.1", "picomatch": "^4.0.3", "sisteransi": "^1.0.5", @@ -1145,6 +1146,15 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/ignore": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", diff --git a/package.json b/package.json index bdf1d6c1..fdd59185 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", + "ignore": "^7.0.5", "jsonc-parser": "^3.3.1", "picomatch": "^4.0.3", "sisteransi": "^1.0.5", diff --git a/src/config.ts b/src/config.ts deleted file mode 100644 index 9ab1032a..00000000 --- a/src/config.ts +++ /dev/null @@ -1,297 +0,0 @@ -/** - * Configuration Management - * - * Load, save, and validate CodeGraph configuration. - */ - -import * as fs from 'fs'; -import * as path from 'path'; -import picomatch from 'picomatch'; -import { CodeGraphConfig, DEFAULT_CONFIG, Language, NodeKind } from './types'; -import { normalizePath } from './utils'; - -/** - * Configuration filename - */ -export const CONFIG_FILENAME = 'config.json'; - -/** - * Get the config file path for a project - */ -export function getConfigPath(projectRoot: string): string { - return path.join(projectRoot, '.codegraph', CONFIG_FILENAME); -} - -/** - * Check if a regex pattern is safe from ReDoS attacks. - * - * Rejects patterns with nested quantifiers (e.g., (a+)+, (a*)*) which - * are the primary source of catastrophic backtracking. Also rejects - * excessively long patterns and validates compilability. - */ -function isSafeRegex(pattern: string): boolean { - // Reject excessively long patterns - if (pattern.length > 500) return false; - - // Reject nested quantifiers: (...)+ followed by +, *, or { - // These are the primary cause of catastrophic backtracking - if (/([+*}])\s*[+*{]/.test(pattern)) return false; - if (/\([^)]*[+*][^)]*\)[+*{]/.test(pattern)) return false; - - // Verify the pattern is a valid regex - try { - new RegExp(pattern); - return true; - } catch { - return false; - } -} - -/** - * Validate a configuration object - */ -export function validateConfig(config: unknown): config is CodeGraphConfig { - if (typeof config !== 'object' || config === null) { - return false; - } - - const c = config as Record; - - // Required fields - if (typeof c.version !== 'number') return false; - if (typeof c.rootDir !== 'string') return false; - if (!Array.isArray(c.include)) return false; - if (!Array.isArray(c.exclude)) return false; - if (!Array.isArray(c.languages)) return false; - if (!Array.isArray(c.frameworks)) return false; - if (typeof c.maxFileSize !== 'number') return false; - if (typeof c.extractDocstrings !== 'boolean') return false; - if (typeof c.trackCallSites !== 'boolean') return false; - - // Validate include/exclude are string arrays - if (!c.include.every((p) => typeof p === 'string')) return false; - if (!c.exclude.every((p) => typeof p === 'string')) return false; - - // Validate languages - const validLanguages: Language[] = [ - 'typescript', - 'javascript', - 'python', - 'go', - 'rust', - 'java', - 'svelte', - 'unknown', - ]; - if (!c.languages.every((l) => validLanguages.includes(l as Language))) return false; - - // Validate frameworks - for (const fw of c.frameworks) { - if (typeof fw !== 'object' || fw === null) return false; - const framework = fw as Record; - if (typeof framework.name !== 'string') return false; - } - - // Validate custom patterns if present - if (c.customPatterns !== undefined) { - if (!Array.isArray(c.customPatterns)) return false; - for (const pattern of c.customPatterns) { - if (typeof pattern !== 'object' || pattern === null) return false; - const p = pattern as Record; - if (typeof p.name !== 'string') return false; - if (typeof p.pattern !== 'string') return false; - if (typeof p.kind !== 'string') return false; - - // Validate regex is compilable and reject patterns with known ReDoS risks - if (!isSafeRegex(p.pattern)) return false; - } - } - - return true; -} - -/** - * Merge configuration with defaults - */ -function mergeConfig( - defaults: CodeGraphConfig, - overrides: Partial -): CodeGraphConfig { - return { - version: overrides.version ?? defaults.version, - rootDir: overrides.rootDir ?? defaults.rootDir, - include: overrides.include ?? defaults.include, - exclude: overrides.exclude ?? defaults.exclude, - languages: overrides.languages ?? defaults.languages, - frameworks: overrides.frameworks ?? defaults.frameworks, - maxFileSize: overrides.maxFileSize ?? defaults.maxFileSize, - extractDocstrings: overrides.extractDocstrings ?? defaults.extractDocstrings, - trackCallSites: overrides.trackCallSites ?? defaults.trackCallSites, - customPatterns: overrides.customPatterns ?? defaults.customPatterns, - }; -} - -/** - * Load configuration from a project - */ -export function loadConfig(projectRoot: string): CodeGraphConfig { - const configPath = getConfigPath(projectRoot); - - if (!fs.existsSync(configPath)) { - // Return default config with adjusted rootDir - return { - ...DEFAULT_CONFIG, - rootDir: projectRoot, - }; - } - - try { - const content = fs.readFileSync(configPath, 'utf-8'); - const parsed = JSON.parse(content) as unknown; - - // Merge with defaults to ensure all fields are present - const merged = mergeConfig(DEFAULT_CONFIG, parsed as Partial); - merged.rootDir = projectRoot; // Always use actual project root - - if (!validateConfig(merged)) { - throw new Error('Invalid configuration format'); - } - - return merged; - } catch (error) { - if (error instanceof SyntaxError) { - throw new Error(`Invalid JSON in config file: ${configPath}`); - } - throw error; - } -} - -/** - * Save configuration to a project - */ -export function saveConfig(projectRoot: string, config: CodeGraphConfig): void { - const configPath = getConfigPath(projectRoot); - const dir = path.dirname(configPath); - - // Ensure directory exists - if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); - } - - // Create a copy without rootDir (it's always derived from project path) - const toSave = { ...config }; - delete (toSave as Partial).rootDir; - - const content = JSON.stringify(toSave, null, 2); - - // Atomic write: write to temp file then rename to prevent partial/corrupt configs - const tmpPath = configPath + '.tmp'; - fs.writeFileSync(tmpPath, content, 'utf-8'); - fs.renameSync(tmpPath, configPath); -} - -/** - * Create default configuration for a new project - */ -export function createDefaultConfig(projectRoot: string): CodeGraphConfig { - return { - ...DEFAULT_CONFIG, - rootDir: projectRoot, - }; -} - -/** - * Update specific configuration values - */ -export function updateConfig( - projectRoot: string, - updates: Partial -): CodeGraphConfig { - const current = loadConfig(projectRoot); - const updated = mergeConfig(current, updates); - updated.rootDir = projectRoot; - saveConfig(projectRoot, updated); - return updated; -} - -/** - * Add patterns to include list - */ -export function addIncludePatterns(projectRoot: string, patterns: string[]): CodeGraphConfig { - const config = loadConfig(projectRoot); - const newPatterns = patterns.filter((p) => !config.include.includes(p)); - config.include = [...config.include, ...newPatterns]; - saveConfig(projectRoot, config); - return config; -} - -/** - * Add patterns to exclude list - */ -export function addExcludePatterns(projectRoot: string, patterns: string[]): CodeGraphConfig { - const config = loadConfig(projectRoot); - const newPatterns = patterns.filter((p) => !config.exclude.includes(p)); - config.exclude = [...config.exclude, ...newPatterns]; - saveConfig(projectRoot, config); - return config; -} - -/** - * Add a custom pattern - */ -export function addCustomPattern( - projectRoot: string, - name: string, - pattern: string, - kind: NodeKind -): CodeGraphConfig { - const config = loadConfig(projectRoot); - - if (!config.customPatterns) { - config.customPatterns = []; - } - - // Check for duplicate name - const existing = config.customPatterns.find((p) => p.name === name); - if (existing) { - existing.pattern = pattern; - existing.kind = kind; - } else { - config.customPatterns.push({ name, pattern, kind }); - } - - saveConfig(projectRoot, config); - return config; -} - -/** - * Check if a file path matches the include/exclude patterns - */ -export function shouldIncludeFile(filePath: string, config: CodeGraphConfig): boolean { - // Normalize to forward slashes so Windows backslash paths match glob patterns - filePath = normalizePath(filePath); - - // Simple glob matching (for now, just check if any pattern matches) - // A full implementation would use a proper glob library - - const matchesPattern = (pattern: string, filePath: string): boolean => { - return picomatch.isMatch(filePath, pattern, { dot: true }); - }; - - // Check exclude patterns first - for (const pattern of config.exclude) { - if (matchesPattern(pattern, filePath)) { - return false; - } - } - - // Check include patterns - for (const pattern of config.include) { - if (matchesPattern(pattern, filePath)) { - return true; - } - } - - // Default to not including if no pattern matches - return false; -} diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index a67d36bb..c78c52ce 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -94,6 +94,17 @@ export const EXTENSION_MAP: Record = { '.luau': 'luau', }; +/** + * Whether a file is one CodeGraph can parse, based purely on its extension. + * This is the single source of truth for "should we index this file" — derived + * from EXTENSION_MAP so parser support and indexing selection never drift. + */ +export function isSourceFile(filePath: string): boolean { + const dot = filePath.lastIndexOf('.'); + if (dot < 0) return false; + return filePath.slice(dot).toLowerCase() in EXTENSION_MAP; +} + /** * Caches for loaded grammars and parsers */ diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 18086bdf..d502a24f 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -14,14 +14,13 @@ import { FileRecord, ExtractionResult, ExtractionError, - CodeGraphConfig, } from '../types'; import { QueryBuilder } from '../db/queries'; import { extractFromSource } from './tree-sitter'; -import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars'; +import { detectLanguage, isSourceFile, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars'; import { logDebug, logWarn } from '../errors'; import { validatePathWithinRoot, normalizePath } from '../utils'; -import picomatch from 'picomatch'; +import ignore, { Ignore } from 'ignore'; import { detectFrameworks } from '../resolution/frameworks'; import type { ResolutionContext } from '../resolution/types'; @@ -94,36 +93,11 @@ export function hashContent(content: string): string { } /** - * Check if a path matches any glob pattern (simplified) + * Skip files larger than this (bytes). Generated bundles, minified JS, and + * vendored blobs blow the WASM heap and the worker-recycle budget for no useful + * symbols. 1 MB covers essentially all hand-written source. */ -function matchesGlob(filePath: string, pattern: string): boolean { - filePath = normalizePath(filePath); - return picomatch.isMatch(filePath, pattern, { dot: true }); -} - -/** - * Check if a file should be included based on config - */ -export function shouldIncludeFile( - filePath: string, - config: CodeGraphConfig -): boolean { - // Check exclude patterns first - for (const pattern of config.exclude) { - if (matchesGlob(filePath, pattern)) { - return false; - } - } - - // Check include patterns - for (const pattern of config.include) { - if (matchesGlob(filePath, pattern)) { - return true; - } - } - - return false; -} +const MAX_FILE_SIZE = 1024 * 1024; /** * Collect git-visible files (tracked + untracked, .gitignore-respected) from the @@ -230,7 +204,7 @@ interface GitChanges { * Use `git status` to detect changed files instead of scanning every file. * Returns null on failure so callers fall back to full scan. */ -function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChanges | null { +function getGitChangedFiles(rootDir: string): GitChanges | null { try { const output = execFileSync( 'git', @@ -248,8 +222,8 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange const statusCode = line.substring(0, 2); const filePath = normalizePath(line.substring(3)); - // Skip files that don't match include/exclude config - if (!shouldIncludeFile(filePath, config)) continue; + // Skip non-source files (git status already omits .gitignored paths). + if (!isSourceFile(filePath)) continue; if (statusCode === '??') { added.push(filePath); @@ -268,20 +242,14 @@ function getGitChangedFiles(rootDir: string, config: CodeGraphConfig): GitChange } /** - * Marker file name that indicates a directory (and all children) should be skipped - */ -const CODEGRAPH_IGNORE_MARKER = '.codegraphignore'; - -/** - * Recursively scan directory for source files. + * Recursively scan a directory for source files. * - * In git repos, uses `git ls-files` to get the file list (inherently - * respects .gitignore at all levels), then filters by config include patterns. - * Falls back to filesystem walk for non-git projects. + * In git repos, uses `git ls-files` (inherently respects .gitignore at all + * levels), then keeps files with a supported source extension. For non-git + * projects, falls back to a filesystem walk that parses .gitignore itself. */ export function scanDirectory( rootDir: string, - config: CodeGraphConfig, onProgress?: (current: number, file: string) => void ): string[] { // Fast path: use git to get all visible files (respects .gitignore everywhere) @@ -290,7 +258,7 @@ export function scanDirectory( const files: string[] = []; let count = 0; for (const filePath of gitFiles) { - if (shouldIncludeFile(filePath, config)) { + if (isSourceFile(filePath)) { files.push(filePath); count++; onProgress?.(count, filePath); @@ -300,7 +268,7 @@ export function scanDirectory( } // Fallback: walk filesystem for non-git projects - return scanDirectoryWalk(rootDir, config, onProgress); + return scanDirectoryWalk(rootDir, onProgress); } /** @@ -309,7 +277,6 @@ export function scanDirectory( */ export async function scanDirectoryAsync( rootDir: string, - config: CodeGraphConfig, onProgress?: (current: number, file: string) => void ): Promise { const gitFiles = getGitVisibleFiles(rootDir); @@ -317,7 +284,7 @@ export async function scanDirectoryAsync( const files: string[] = []; let count = 0; for (const filePath of gitFiles) { - if (shouldIncludeFile(filePath, config)) { + if (isSourceFile(filePath)) { files.push(filePath); count++; onProgress?.(count, filePath); @@ -330,7 +297,7 @@ export async function scanDirectoryAsync( return files; } - return scanDirectoryWalk(rootDir, config, onProgress); + return scanDirectoryWalk(rootDir, onProgress); } /** @@ -338,14 +305,44 @@ export async function scanDirectoryAsync( */ function scanDirectoryWalk( rootDir: string, - config: CodeGraphConfig, onProgress?: (current: number, file: string) => void ): string[] { const files: string[] = []; let count = 0; const visitedDirs = new Set(); - function walk(dir: string): void { + // A .gitignore matcher scoped to the directory that declared it. Patterns in + // a nested .gitignore are relative to that directory, so we keep the dir + // alongside the matcher and test paths relative to it — mirroring how git + // applies .gitignore files at every level. + interface ScopedIgnore { + dir: string; + ig: Ignore; + } + + const loadIgnore = (dir: string): ScopedIgnore | null => { + try { + const giPath = path.join(dir, '.gitignore'); + if (fs.existsSync(giPath)) { + return { dir, ig: ignore().add(fs.readFileSync(giPath, 'utf-8')) }; + } + } catch { + // Unreadable .gitignore — treat as absent. + } + return null; + }; + + const isIgnored = (fullPath: string, isDir: boolean, matchers: ScopedIgnore[]): boolean => { + for (const { dir, ig } of matchers) { + let rel = normalizePath(path.relative(dir, fullPath)); + if (!rel || rel.startsWith('..')) continue; // not under this matcher's dir + if (isDir) rel += '/'; // dir-only rules (e.g. `build/`) only match with the slash + if (ig.ignores(rel)) return true; + } + return false; + }; + + function walk(dir: string, matchers: ScopedIgnore[]): void { let realDir: string; try { realDir = fs.realpathSync(dir); @@ -360,12 +357,9 @@ function scanDirectoryWalk( } visitedDirs.add(realDir); - // Check for .codegraphignore marker file - const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER); - if (fs.existsSync(ignoreMarker)) { - logDebug('Skipping directory due to .codegraphignore marker', { dir }); - return; - } + // This directory's own .gitignore (if present) applies to everything below it. + const own = loadIgnore(dir); + const active = own ? [...matchers, own] : matchers; let entries: fs.Dirent[]; try { @@ -376,6 +370,9 @@ function scanDirectoryWalk( } for (const entry of entries) { + // Never descend into git internals or our own data directory. + if (entry.name === '.git' || entry.name === '.codegraph') continue; + const fullPath = path.join(dir, entry.name); const relativePath = normalizePath(path.relative(rootDir, fullPath)); @@ -384,19 +381,11 @@ function scanDirectoryWalk( const realTarget = fs.realpathSync(fullPath); const stat = fs.statSync(realTarget); if (stat.isDirectory()) { - const dirPattern = relativePath + '/'; - let excluded = false; - for (const pattern of config.exclude) { - if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) { - excluded = true; - break; - } - } - if (!excluded) { - walk(fullPath); + if (!isIgnored(fullPath, true, active)) { + walk(fullPath, active); } } else if (stat.isFile()) { - if (shouldIncludeFile(relativePath, config)) { + if (!isIgnored(fullPath, false, active) && isSourceFile(relativePath)) { files.push(relativePath); count++; onProgress?.(count, relativePath); @@ -409,19 +398,11 @@ function scanDirectoryWalk( } if (entry.isDirectory()) { - const dirPattern = relativePath + '/'; - let excluded = false; - for (const pattern of config.exclude) { - if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) { - excluded = true; - break; - } - } - if (!excluded) { - walk(fullPath); + if (!isIgnored(fullPath, true, active)) { + walk(fullPath, active); } } else if (entry.isFile()) { - if (shouldIncludeFile(relativePath, config)) { + if (!isIgnored(fullPath, false, active) && isSourceFile(relativePath)) { files.push(relativePath); count++; onProgress?.(count, relativePath); @@ -430,7 +411,7 @@ function scanDirectoryWalk( } } - walk(rootDir); + walk(rootDir, []); return files; } @@ -439,7 +420,6 @@ function scanDirectoryWalk( */ export class ExtractionOrchestrator { private rootDir: string; - private config: CodeGraphConfig; private queries: QueryBuilder; /** * Names of frameworks detected for this project, populated by indexAll(). @@ -449,9 +429,8 @@ export class ExtractionOrchestrator { */ private detectedFrameworkNames: string[] | null = null; - constructor(rootDir: string, config: CodeGraphConfig, queries: QueryBuilder) { + constructor(rootDir: string, queries: QueryBuilder) { this.rootDir = rootDir; - this.config = config; this.queries = queries; } @@ -500,7 +479,7 @@ export class ExtractionOrchestrator { */ private ensureDetectedFrameworks(files?: string[]): string[] { if (this.detectedFrameworkNames !== null) return this.detectedFrameworkNames; - const fileList = files ?? scanDirectory(this.rootDir, this.config); + const fileList = files ?? scanDirectory(this.rootDir); const context = this.buildDetectionContext(fileList); this.detectedFrameworkNames = detectFrameworks(context).map((r) => r.name); return this.detectedFrameworkNames; @@ -534,7 +513,7 @@ export class ExtractionOrchestrator { total: 0, }); - const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => { + const files = await scanDirectoryAsync(this.rootDir, (current, file) => { onProgress?.({ phase: 'scanning', current, @@ -802,18 +781,16 @@ export class ExtractionOrchestrator { continue; } - // Honour config.maxFileSize. Without this check, vendored - // generated headers, minified bundles, and other multi-MB - // files get indexed despite the user setting a size cap — - // wasting WASM heap and the worker recycle budget on inputs - // the user explicitly opted out of. The single-file extractFile - // path already enforces this; the bulk path used to silently - // skip the check. - if (stats.size > this.config.maxFileSize) { + // Honour MAX_FILE_SIZE. Without this check, vendored generated + // headers, minified bundles, and other multi-MB files get indexed, + // wasting WASM heap and the worker recycle budget on inputs with no + // useful symbols. The single-file extractFile path already enforces + // this; the bulk path used to silently skip the check. + if (stats.size > MAX_FILE_SIZE) { processed++; filesSkipped++; errors.push({ - message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`, + message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`, filePath, severity: 'warning', code: 'size_exceeded', @@ -1108,14 +1085,14 @@ export class ExtractionOrchestrator { } // Check file size - if (stats.size > this.config.maxFileSize) { + if (stats.size > MAX_FILE_SIZE) { return { nodes: [], edges: [], unresolvedReferences: [], errors: [ { - message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`, + message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`, filePath: relativePath, severity: 'warning', code: 'size_exceeded', @@ -1245,7 +1222,7 @@ export class ExtractionOrchestrator { }); const filesToIndex: string[] = []; - const gitChanges = getGitChangedFiles(this.rootDir, this.config); + const gitChanges = getGitChangedFiles(this.rootDir); if (gitChanges) { // === Git fast path === @@ -1291,7 +1268,7 @@ export class ExtractionOrchestrator { } } else { // === Fallback: full scan (non-git project or git failure) === - const currentFiles = new Set(scanDirectory(this.rootDir, this.config)); + const currentFiles = new Set(scanDirectory(this.rootDir)); filesChecked = currentFiles.size; // Build Map for O(1) lookups instead of .find() per file @@ -1376,7 +1353,7 @@ export class ExtractionOrchestrator { * Uses git status as a fast path when available, falling back to full scan. */ getChangedFiles(): { added: string[]; modified: string[]; removed: string[] } { - const gitChanges = getGitChangedFiles(this.rootDir, this.config); + const gitChanges = getGitChangedFiles(this.rootDir); if (gitChanges) { // === Git fast path === @@ -1420,7 +1397,7 @@ export class ExtractionOrchestrator { } // === Fallback: full scan (non-git project or git failure) === - const currentFiles = new Set(scanDirectory(this.rootDir, this.config)); + const currentFiles = new Set(scanDirectory(this.rootDir)); const trackedFiles = this.queries.getAllFiles(); // Build Map for O(1) lookups @@ -1467,4 +1444,4 @@ export class ExtractionOrchestrator { // Re-export useful types and functions export { extractFromSource } from './tree-sitter'; -export { detectLanguage, isLanguageSupported, isGrammarLoaded, getSupportedLanguages, initGrammars, loadGrammarsForLanguages, loadAllGrammars } from './grammars'; +export { detectLanguage, isSourceFile, isLanguageSupported, isGrammarLoaded, getSupportedLanguages, initGrammars, loadGrammarsForLanguages, loadAllGrammars } from './grammars'; diff --git a/src/index.ts b/src/index.ts index 99b55ad7..b2acf346 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,6 @@ import * as path from 'path'; import { - CodeGraphConfig, Node, Edge, FileRecord, @@ -25,7 +24,6 @@ import { } from './types'; import { DatabaseConnection, getDatabasePath } from './db'; import { QueryBuilder } from './db/queries'; -import { loadConfig, saveConfig, createDefaultConfig } from './config'; import { isInitialized, createDirectory, @@ -53,7 +51,6 @@ import { FileWatcher, WatchOptions } from './sync'; // Re-export types for consumers export * from './types'; export { getDatabasePath } from './db'; -export { getConfigPath } from './config'; export { getCodeGraphDir, isInitialized, @@ -85,9 +82,6 @@ export { MCPServer } from './mcp'; * Options for initializing a new CodeGraph project */ export interface InitOptions { - /** Custom configuration overrides */ - config?: Partial; - /** Whether to run initial indexing after init */ index?: boolean; @@ -128,7 +122,6 @@ export interface IndexOptions { export class CodeGraph { private db: DatabaseConnection; private queries: QueryBuilder; - private config: CodeGraphConfig; private projectRoot: string; private orchestrator: ExtractionOrchestrator; private resolver: ReferenceResolver; @@ -148,17 +141,15 @@ export class CodeGraph { private constructor( db: DatabaseConnection, queries: QueryBuilder, - config: CodeGraphConfig, projectRoot: string ) { this.db = db; this.queries = queries; - this.config = config; this.projectRoot = projectRoot; this.fileLock = new FileLock( path.join(projectRoot, '.codegraph', 'codegraph.lock') ); - this.orchestrator = new ExtractionOrchestrator(projectRoot, config, queries); + this.orchestrator = new ExtractionOrchestrator(projectRoot, queries); this.resolver = createResolver(projectRoot, queries); this.graphManager = new GraphQueryManager(queries); this.traverser = new GraphTraverser(queries); @@ -194,19 +185,12 @@ export class CodeGraph { // Create directory structure createDirectory(resolvedRoot); - // Create and save configuration - const config = createDefaultConfig(resolvedRoot); - if (options.config) { - Object.assign(config, options.config); - } - saveConfig(resolvedRoot, config); - // Initialize database const dbPath = getDatabasePath(resolvedRoot); const db = DatabaseConnection.initialize(dbPath); const queries = new QueryBuilder(db.getDb()); - const instance = new CodeGraph(db, queries, config, resolvedRoot); + const instance = new CodeGraph(db, queries, resolvedRoot); // Run initial indexing if requested if (options.index) { @@ -219,7 +203,7 @@ export class CodeGraph { /** * Initialize synchronously (without indexing) */ - static initSync(projectRoot: string, options: Omit = {}): CodeGraph { + static initSync(projectRoot: string): CodeGraph { const resolvedRoot = path.resolve(projectRoot); // Check if already initialized @@ -230,19 +214,12 @@ export class CodeGraph { // Create directory structure createDirectory(resolvedRoot); - // Create and save configuration - const config = createDefaultConfig(resolvedRoot); - if (options.config) { - Object.assign(config, options.config); - } - saveConfig(resolvedRoot, config); - // Initialize database const dbPath = getDatabasePath(resolvedRoot); const db = DatabaseConnection.initialize(dbPath); const queries = new QueryBuilder(db.getDb()); - return new CodeGraph(db, queries, config, resolvedRoot); + return new CodeGraph(db, queries, resolvedRoot); } /** @@ -267,15 +244,12 @@ export class CodeGraph { throw new Error(`Invalid CodeGraph directory: ${validation.errors.join(', ')}`); } - // Load configuration - const config = loadConfig(resolvedRoot); - // Open database const dbPath = getDatabasePath(resolvedRoot); const db = DatabaseConnection.open(dbPath); const queries = new QueryBuilder(db.getDb()); - const instance = new CodeGraph(db, queries, config, resolvedRoot); + const instance = new CodeGraph(db, queries, resolvedRoot); // Sync if requested if (options.sync) { @@ -302,15 +276,12 @@ export class CodeGraph { throw new Error(`Invalid CodeGraph directory: ${validation.errors.join(', ')}`); } - // Load configuration - const config = loadConfig(resolvedRoot); - // Open database const dbPath = getDatabasePath(resolvedRoot); const db = DatabaseConnection.open(dbPath); const queries = new QueryBuilder(db.getDb()); - return new CodeGraph(db, queries, config, resolvedRoot); + return new CodeGraph(db, queries, resolvedRoot); } /** @@ -330,32 +301,6 @@ export class CodeGraph { this.db.close(); } - // =========================================================================== - // Configuration - // =========================================================================== - - /** - * Get the current configuration - */ - getConfig(): CodeGraphConfig { - return { ...this.config }; - } - - /** - * Update configuration - */ - updateConfig(updates: Partial): void { - Object.assign(this.config, updates); - saveConfig(this.projectRoot, this.config); - // Recreate orchestrator and resolver with new config - this.orchestrator = new ExtractionOrchestrator( - this.projectRoot, - this.config, - this.queries - ); - this.resolver = createResolver(this.projectRoot, this.queries); - } - /** * Get the project root directory */ @@ -515,7 +460,6 @@ export class CodeGraph { this.watcher = new FileWatcher( this.projectRoot, - this.config, async () => { const result = await this.sync(); const filesChanged = result.filesAdded + result.filesModified + result.filesRemoved; diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index 2c16d82a..68e60fff 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -9,8 +9,7 @@ */ import * as fs from 'fs'; -import { CodeGraphConfig } from '../types'; -import { shouldIncludeFile } from '../extraction'; +import { isSourceFile } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; import { watchDisabledReason } from './watch-policy'; @@ -44,7 +43,7 @@ export interface WatchOptions { * Design goals: * - Minimal resource usage (native OS file events, no polling) * - Debounced to avoid thrashing on rapid saves - * - Filters against CodeGraph include/exclude patterns + * - Filters to supported source files by extension * - Ignores .codegraph/ directory changes */ export class FileWatcher { @@ -55,7 +54,6 @@ export class FileWatcher { private stopped = false; private readonly projectRoot: string; - private readonly config: CodeGraphConfig; private readonly debounceMs: number; private readonly syncFn: () => Promise<{ filesChanged: number; durationMs: number }>; private readonly onSyncComplete?: WatchOptions['onSyncComplete']; @@ -63,12 +61,10 @@ export class FileWatcher { constructor( projectRoot: string, - config: CodeGraphConfig, syncFn: () => Promise<{ filesChanged: number; durationMs: number }>, options: WatchOptions = {} ) { this.projectRoot = projectRoot; - this.config = config; this.syncFn = syncFn; this.debounceMs = options.debounceMs ?? 2000; this.onSyncComplete = options.onSyncComplete; @@ -112,8 +108,8 @@ export class FileWatcher { return; } - // Filter against include/exclude patterns - if (!shouldIncludeFile(normalized, this.config)) { + // Only sync changes to files we can actually parse. + if (!isSourceFile(normalized)) { return; } diff --git a/src/types.ts b/src/types.ts index 54485ac0..0168665d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -426,297 +426,6 @@ export interface CodeBlock { node?: Node; } -// ============================================================================= -// Configuration Types -// ============================================================================= - -/** - * Framework-specific hints for better extraction - */ -export interface FrameworkHint { - /** Framework name (react, express, django, etc.) */ - name: string; - - /** Version constraint if relevant */ - version?: string; - - /** Custom patterns for this framework */ - patterns?: { - /** Component detection patterns */ - components?: string[]; - /** Route detection patterns */ - routes?: string[]; - /** Model detection patterns */ - models?: string[]; - }; -} - -/** - * Configuration for a CodeGraph project - */ -export interface CodeGraphConfig { - /** Schema version for migrations */ - version: number; - - /** Root directory of the project */ - rootDir: string; - - /** Glob patterns for files to include */ - include: string[]; - - /** Glob patterns for files to exclude */ - exclude: string[]; - - /** Languages to process (auto-detected if empty) */ - languages: Language[]; - - /** Framework hints for better extraction */ - frameworks: FrameworkHint[]; - - /** Maximum file size to process (in bytes) */ - maxFileSize: number; - - /** Whether to extract docstrings */ - extractDocstrings: boolean; - - /** Whether to track call sites */ - trackCallSites: boolean; - - /** Custom symbol patterns to extract */ - customPatterns?: { - /** Name for this pattern group */ - name: string; - /** Regex pattern to match */ - pattern: string; - /** Node kind to assign */ - kind: NodeKind; - }[]; -} - -/** - * Default configuration values - */ -export const DEFAULT_CONFIG: CodeGraphConfig = { - version: 1, - rootDir: '.', - include: [ - // TypeScript/JavaScript - '**/*.ts', - '**/*.tsx', - '**/*.js', - '**/*.jsx', - // Python - '**/*.py', - // Go - '**/*.go', - // Rust - '**/*.rs', - // Java - '**/*.java', - // C/C++ - '**/*.c', - '**/*.h', - '**/*.cpp', - '**/*.hpp', - '**/*.cc', - '**/*.cxx', - // C# - '**/*.cs', - // PHP - '**/*.php', - // Drupal-specific PHP extensions - '**/*.module', - '**/*.install', - '**/*.theme', - '**/*.inc', - // Drupal routing YAML - '**/*.routing.yml', - // Twig templates - '**/*.twig', - // Ruby - '**/*.rb', - // Swift - '**/*.swift', - // Kotlin - '**/*.kt', - '**/*.kts', - // Dart - '**/*.dart', - // Svelte - '**/*.svelte', - // Vue - '**/*.vue', - // Liquid (Shopify themes) - '**/*.liquid', - // Pascal / Delphi - '**/*.pas', - '**/*.dpr', - '**/*.dpk', - '**/*.lpr', - '**/*.dfm', - '**/*.fmx', - // Scala - '**/*.scala', - '**/*.sc', - // Lua - '**/*.lua', - // Luau - '**/*.luau', - ], - exclude: [ - // Version control - '**/.git/**', - - // Dependencies - '**/node_modules/**', - '**/vendor/**', - '**/Pods/**', - - // Generic build outputs - '**/dist/**', - '**/build/**', - '**/out/**', - '**/bin/**', - '**/obj/**', - '**/target/**', - - // JavaScript/TypeScript - '**/*.min.js', - '**/*.bundle.js', - '**/.next/**', - '**/.nuxt/**', - '**/.svelte-kit/**', - '**/.output/**', - '**/.turbo/**', - '**/.cache/**', - '**/.parcel-cache/**', - '**/.vite/**', - '**/.astro/**', - '**/.docusaurus/**', - '**/.gatsby/**', - '**/.webpack/**', - '**/.nx/**', - '**/.yarn/cache/**', - '**/.pnpm-store/**', - '**/storybook-static/**', - - // React Native / Expo - '**/.expo/**', - '**/web-build/**', - '**/ios/Pods/**', - '**/ios/build/**', - '**/android/build/**', - '**/android/.gradle/**', - - // Python - '**/__pycache__/**', - '**/.venv/**', - '**/venv/**', - '**/site-packages/**', - '**/dist-packages/**', - '**/.pytest_cache/**', - '**/.mypy_cache/**', - '**/.ruff_cache/**', - '**/.tox/**', - '**/.nox/**', - '**/*.egg-info/**', - '**/.eggs/**', - - // Go - '**/go/pkg/mod/**', - - // Rust - '**/target/debug/**', - '**/target/release/**', - - // Java/Kotlin/Gradle - '**/.gradle/**', - '**/.m2/**', - '**/generated-sources/**', - '**/.kotlin/**', - - // Dart/Flutter - '**/.dart_tool/**', - - // C#/.NET - '**/.vs/**', - '**/.nuget/**', - '**/artifacts/**', - '**/publish/**', - - // C/C++ - '**/cmake-build-*/**', - '**/CMakeFiles/**', - '**/bazel-*/**', - '**/vcpkg_installed/**', - '**/.conan/**', - '**/Debug/**', - '**/Release/**', - '**/x64/**', - '**/.pio/**', // Platform.io (IoT/embedded build artifacts and library deps) - - // Electron - '**/release/**', - '**/*.app/**', - '**/*.asar', - - // Swift/iOS/Xcode - '**/DerivedData/**', - '**/.build/**', - '**/.swiftpm/**', - '**/xcuserdata/**', - '**/Carthage/Build/**', - '**/SourcePackages/**', - - // Delphi/Pascal - '**/__history/**', - '**/__recovery/**', - '**/*.dcu', - - // PHP - '**/.composer/**', - '**/storage/framework/**', - '**/bootstrap/cache/**', - - // Drupal - core and contrib are rarely customised; index only custom code - '**/web/core/**', - '**/web/modules/contrib/**', - '**/web/themes/contrib/**', - - // Ruby - '**/.bundle/**', - '**/tmp/cache/**', - '**/public/assets/**', - '**/public/packs/**', - '**/.yardoc/**', - - // Testing/Coverage - '**/coverage/**', - '**/htmlcov/**', - '**/.nyc_output/**', - '**/test-results/**', - '**/.coverage/**', - - // IDE/Editor - '**/.idea/**', - - // Logs and temp - '**/logs/**', - '**/tmp/**', - '**/temp/**', - - // Documentation build output - '**/_build/**', - '**/docs/_build/**', - '**/site/**', - ], - languages: [], - frameworks: [], - maxFileSize: 1024 * 1024, // 1MB - extractDocstrings: true, - trackCallSites: true, -}; - // ============================================================================= // Database Types // =============================================================================