diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 00000000..e606b26f --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,33 @@ +name: CI + +on: + push: + branches: [main,julia] + pull_request: + branches: [main,julia] + +jobs: + test: + name: Test (Node ${{ matrix.node-version }}, ${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] # windows-latest + node-version: [24, 26] + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: npm + + - run: npm ci + + - name: Build + run: npm run build + + - name: Run tests + run: npm test diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 99c38345..91fca501 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3900,3 +3900,161 @@ local count = 0 }); }); }); + +// ============================================================================= +// Julia +// ============================================================================= + +describe('Julia Extraction', () => { + describe('Language detection', () => { + it('should detect Julia files', () => { + expect(detectLanguage('main.jl')).toBe('julia'); + expect(detectLanguage('src/math/utils.jl')).toBe('julia'); + }); + + it('should report Julia as supported', () => { + expect(isLanguageSupported('julia')).toBe(true); + expect(getSupportedLanguages()).toContain('julia'); + }); + }); + + describe('Function/type/import extraction', () => { + it('should extract function signature with return type and where clause', () => { + const code = ` +function distance(p::Point{T}, q::Point{T})::T where T + return norm([p.x - q.x, p.y - q.y]) +end +`; + const result = extractFromSource('geom.jl', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'distance'); + expect(fn).toBeDefined(); + expect(fn?.language).toBe('julia'); + expect(fn?.signature).toContain('(p::Point{T}, q::Point{T})'); + expect(fn?.signature).toContain('::T'); + expect(fn?.signature).toContain('where T'); + }); + + it('should extract struct and abstract type names', () => { + const code = ` +abstract type Shape end + +struct Point{T} + x::T + y::T +end +`; + const result = extractFromSource('types.jl', code); + const iface = result.nodes.find((n) => n.kind === 'interface' && n.name === 'Shape'); + const struct = result.nodes.find((n) => n.kind === 'struct' && n.name === 'Point'); + expect(iface).toBeDefined(); + expect(struct).toBeDefined(); + }); + + it('should extract using/import statements and emit import references', () => { + const code = ` +using LinearAlgebra +import Base: show +`; + const result = extractFromSource('imports.jl', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('LinearAlgebra'); + expect(imports).toContain('Base'); + + const importRef = result.unresolvedReferences.find( + (r) => r.referenceKind === 'imports' && r.referenceName === 'LinearAlgebra' + ); + expect(importRef).toBeDefined(); + }); + + it('should extract function calls as unresolved call references', () => { + const code = ` +helper(x) = x * 2 + +function run(y) + return helper(y) +end +`; + const result = extractFromSource('calls.jl', code); + const call = result.unresolvedReferences.find( + (r) => r.referenceKind === 'calls' && r.referenceName === 'helper' + ); + expect(call).toBeDefined(); + }); + }); + + describe('Short-form function definitions', () => { + it('should extract short assignment-form functions', () => { + const code = ` +add(x, y) = x + y +distance(a::Point, b::Point) = sqrt((a.x - b.x)^2) +`; + const result = extractFromSource('short.jl', code); + const funcs = result.nodes.filter((n) => n.kind === 'function').map((n) => n.name); + expect(funcs).toContain('add'); + expect(funcs).toContain('distance'); + const add = result.nodes.find((n) => n.name === 'add'); + expect(add?.signature).toBe('(x, y)'); + }); + }); + + describe('Struct field extraction', () => { + it('should extract typed and untyped fields from struct body', () => { + const code = ` +struct Point + x::Float64 + y::Float64 + label +end +`; + const result = extractFromSource('structs.jl', code); + const fields = result.nodes.filter((n) => n.kind === 'field').map((n) => n.name); + expect(fields).toContain('x'); + expect(fields).toContain('y'); + expect(fields).toContain('label'); + const x = result.nodes.find((n) => n.kind === 'field' && n.name === 'x'); + expect(x?.signature).toBe('x::Float64'); + }); + }); + + describe('include() as relative import', () => { + it('should convert include("file.jl") to an import node', () => { + const code = `include("utils.jl")`; + const result = extractFromSource('app.jl', code); + const imp = result.nodes.find((n) => n.kind === 'import' && n.name === 'utils'); + expect(imp).toBeDefined(); + expect(imp?.signature).toBe('include("utils.jl")'); + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'imports' && r.referenceName === 'utils' + ); + expect(ref).toBeDefined(); + }); + }); + + describe('Module extraction', () => { + it('should extract module_definition as a namespace node', () => { + const code = ` +module MyPkg + function foo() end +end +`; + const result = extractFromSource('pkg.jl', code); + const mod = result.nodes.find((n) => n.kind === 'namespace' && n.name === 'MyPkg'); + expect(mod).toBeDefined(); + const foo = result.nodes.find((n) => n.kind === 'function' && n.name === 'foo'); + expect(foo).toBeDefined(); + }); + }); + + describe('Qualified method names', () => { + it('should extract Base.getindex-style qualified function names', () => { + const code = ` +function Base.getindex(x::Vector{T}, i::Int) where T + return x[i] +end +`; + const result = extractFromSource('ext.jl', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'Base.getindex'); + expect(fn).toBeDefined(); + }); + }); +}); diff --git a/__tests__/mcp-initialize.test.ts b/__tests__/mcp-initialize.test.ts index 4a57ebae..604ceae4 100644 --- a/__tests__/mcp-initialize.test.ts +++ b/__tests__/mcp-initialize.test.ts @@ -11,7 +11,7 @@ * contract that initialize is fast regardless of how much work init does. */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { spawn, ChildProcessWithoutNullStreams } from 'child_process'; +import { spawn, spawnSync, ChildProcessWithoutNullStreams } from 'child_process'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; @@ -20,9 +20,20 @@ import { CodeGraph } from '../src'; const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js'); function spawnServer(cwd: string): ChildProcessWithoutNullStreams { - return spawn(process.execPath, [BIN, 'serve', '--mcp'], { + return spawn(process.execPath, ['--liftoff-only', BIN, 'serve', '--mcp'], { cwd, + env: { + ...process.env, + // Keep the MCP handshake tests exercising the protocol behavior even on + // environments running Node >=25, where the CLI otherwise exits before + // serving due to the WASM safety guard. + CODEGRAPH_ALLOW_UNSAFE_NODE: '1', + // We already pass `--liftoff-only` explicitly above; disable CLI + // self-relaunch so the test controls a single child process PID. + CODEGRAPH_NO_RELAUNCH: '1', + }, stdio: ['pipe', 'pipe', 'pipe'], + env: { ...process.env, CODEGRAPH_NO_RELAUNCH: '1' }, }) as ChildProcessWithoutNullStreams; } @@ -91,6 +102,29 @@ function waitFor( }); } +async function stopChild(child: ChildProcessWithoutNullStreams | null): Promise { + if (!child) return; + const pid = child.pid; + if (pid === undefined) return; + + if (process.platform === 'win32') { + // On Windows, ChildProcess.kill('SIGKILL') is not a real SIGKILL and may + // leave descendants alive. Kill the full process tree deterministically. + try { spawnSync('taskkill', ['/PID', String(pid), '/T', '/F'], { stdio: 'ignore' }); } catch { /* ignore */ } + } else if (!child.killed) { + try { child.kill('SIGKILL'); } catch { /* ignore */ } + } + + await new Promise((resolve) => { + if (child.exitCode !== null) return resolve(); + const timer = setTimeout(resolve, 2000); + child.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); +} + describe('MCP initialize handshake (issue #172)', () => { let tempDir: string; let child: ChildProcessWithoutNullStreams | null = null; @@ -99,9 +133,12 @@ describe('MCP initialize handshake (issue #172)', () => { tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mcp-init-')); }); - afterEach(() => { - if (child && !child.killed) { - child.kill('SIGKILL'); + afterEach(async () => { + if (child) { + if (!child.killed) child.kill('SIGKILL'); + if (child.exitCode === null) { + await new Promise(resolve => child!.once('close', resolve)); + } child = null; } fs.rmSync(tempDir, { recursive: true, force: true }); diff --git a/__tests__/mcp-roots.test.ts b/__tests__/mcp-roots.test.ts index 8e1d4520..335e0b6b 100644 --- a/__tests__/mcp-roots.test.ts +++ b/__tests__/mcp-roots.test.ts @@ -16,7 +16,7 @@ * mocking — so they also exercise the new bidirectional request/response path. */ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; -import { spawn, ChildProcessWithoutNullStreams } from 'child_process'; +import { spawn, spawnSync, ChildProcessWithoutNullStreams } from 'child_process'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; @@ -26,9 +26,20 @@ const BIN = path.resolve(__dirname, '../dist/bin/codegraph.js'); function spawnServer(cwd: string): ChildProcessWithoutNullStreams { // --no-watch keeps the test deterministic and avoids watcher startup noise. - return spawn(process.execPath, [BIN, 'serve', '--mcp', '--no-watch'], { + return spawn(process.execPath, ['--liftoff-only', BIN, 'serve', '--mcp', '--no-watch'], { cwd, + env: { + ...process.env, + // These tests validate MCP handshake/project-resolution behavior. On + // Node >=25 the CLI exits early unless explicitly overridden, which + // would make stdout appear "silent" and fail the handshake assertions. + CODEGRAPH_ALLOW_UNSAFE_NODE: '1', + // We already pass `--liftoff-only` explicitly above; disable CLI + // self-relaunch so the test controls a single child process PID. + CODEGRAPH_NO_RELAUNCH: '1', + }, stdio: ['pipe', 'pipe', 'pipe'], + env: { ...process.env, CODEGRAPH_NO_RELAUNCH: '1' }, }) as ChildProcessWithoutNullStreams; } @@ -74,6 +85,29 @@ function send(child: ChildProcessWithoutNullStreams, msg: object): void { const CLIENT_INFO = { name: 'test', version: '0.0.0' }; +async function stopChild(child: ChildProcessWithoutNullStreams | null): Promise { + if (!child) return; + const pid = child.pid; + if (pid === undefined) return; + + if (process.platform === 'win32') { + // On Windows, ChildProcess.kill('SIGKILL') is not a real SIGKILL and may + // leave descendants alive. Kill the full process tree deterministically. + try { spawnSync('taskkill', ['/PID', String(pid), '/T', '/F'], { stdio: 'ignore' }); } catch { /* ignore */ } + } else if (!child.killed) { + try { child.kill('SIGKILL'); } catch { /* ignore */ } + } + + await new Promise((resolve) => { + if (child.exitCode !== null) return resolve(); + const timer = setTimeout(resolve, 2000); + child.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); +} + describe('MCP project resolution via roots/list (issue #196)', () => { let cwdDir: string; // where the server is launched — has NO .codegraph let projectDir: string; // the real indexed project the client reports @@ -84,9 +118,12 @@ describe('MCP project resolution via roots/list (issue #196)', () => { projectDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-mcp-proj-')); }); - afterEach(() => { - if (child && !child.killed) { - child.kill('SIGKILL'); + afterEach(async () => { + if (child) { + if (!child.killed) child.kill('SIGKILL'); + if (child.exitCode === null) { + await new Promise(resolve => child!.once('close', resolve)); + } child = null; } fs.rmSync(cwdDir, { recursive: true, force: true }); diff --git a/package.json b/package.json index 5455ced9..6ed99398 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,6 @@ "vitest": "^2.1.9" }, "engines": { - "node": ">=20.0.0 <25.0.0" + "node": ">=20.0.0 <25.0.0 || >=26.0.0" } } diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 6bc63b3f..f9226722 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -61,7 +61,7 @@ const importESM = new Function('specifier', 'return import(specifier)') as // who patched V8 themselves or want to test a future fix. const nodeVersion = process.versions.node; const nodeMajor = parseInt(nodeVersion.split('.')[0] ?? '0', 10); -if (nodeMajor >= 25) { +if (nodeMajor === 25) { process.stderr.write(buildNode25BlockBanner(nodeVersion) + '\n'); if (!process.env.CODEGRAPH_ALLOW_UNSAFE_NODE) { process.exit(1); diff --git a/src/db/index.ts b/src/db/index.ts index cbc08b8f..b070177a 100644 --- a/src/db/index.ts +++ b/src/db/index.ts @@ -6,12 +6,67 @@ import { SqliteDatabase, SqliteBackend, createDatabase } from './sqlite-adapter'; import * as fs from 'fs'; +import * as os from 'os'; import * as path from 'path'; import { SchemaVersion } from '../types'; import { runMigrations, getCurrentVersion, CURRENT_SCHEMA_VERSION } from './migrations'; export { SqliteDatabase, SqliteBackend } from './sqlite-adapter'; +/** + * Detect whether a file path lives on a network filesystem (CIFS/NFS/etc.). + * + * On Linux, reads /proc/mounts and finds the deepest matching mount point. + * On macOS, checks for /Volumes/ paths (network mounts land there by default). + * On Windows, UNC paths (\\server\share) are always network. + * + * Returns false on any parse error so the caller degrades gracefully. + */ +function isNetworkFilesystem(filePath: string): boolean { + const platform = os.platform(); + const resolved = path.resolve(filePath); + + if (platform === 'linux') { + try { + const mounts = fs.readFileSync('/proc/mounts', 'utf-8'); + let bestMount = ''; + let bestFsType = ''; + for (const line of mounts.split('\n')) { + const parts = line.trim().split(/\s+/); + if (parts.length < 3) continue; + const mountPoint = parts[1] as string; + const fsType = parts[2] as string; + if (resolved.startsWith(mountPoint + '/') || resolved === mountPoint) { + if (mountPoint.length > bestMount.length) { + bestMount = mountPoint; + bestFsType = fsType; + } + } + } + const networkTypes = new Set([ + 'cifs', 'smbfs', 'smb2', 'nfs', 'nfs4', 'nfs3', + 'davfs', 'fuse.sshfs', 'fuse.rclone', 'fuse.s3fs', + 'ncpfs', 'afs', 'coda', 'glusterfs', 'lustre', + ]); + return networkTypes.has(bestFsType.toLowerCase()); + } catch { + return false; + } + } + + if (platform === 'darwin') { + // Network mounts typically appear under /Volumes or /net + return resolved.startsWith('/Volumes/') || resolved.startsWith('/net/'); + } + + if (platform === 'win32') { + // UNC paths: \\server\share\... + return resolved.startsWith('\\\\'); + } + + return false; +} + /** * Apply connection-level PRAGMAs. Shared by `initialize` and `open` so the two * paths can't drift. @@ -25,15 +80,36 @@ export { SqliteDatabase, SqliteBackend } from './sqlite-adapter'; * 2-minute wait presented as a frozen, hung agent. With WAL, reads never block * on a writer, so this timeout only governs cross-process write contention * (e.g. the git-hook `codegraph sync` running while the MCP server writes). + * + * On network filesystems (CIFS/NFS/…) WAL mode is skipped: those mounts do + * not support the shared-memory files (-wal/-shm) that WAL requires, and + * mmap I/O is unreliable over the network. MEMORY journal avoids all disk + * I/O for journaling; synchronous=OFF is safe because the index is fully + * rebuildable from source. */ -function configureConnection(db: SqliteDatabase): void { - db.pragma('busy_timeout = 5000'); // MUST be first — see above +function configureConnection(db: SqliteDatabase, dbPath: string): void { + const networkFs = isNetworkFilesystem(dbPath); + + // busy_timeout MUST come first — lets subsequent pragmas wait out any lock + db.pragma(networkFs ? 'busy_timeout = 30000' : 'busy_timeout = 5000'); db.pragma('foreign_keys = ON'); - db.pragma('journal_mode = WAL'); // node:sqlite supports WAL on every platform - db.pragma('synchronous = NORMAL'); // safe with WAL mode + + if (networkFs) { + // WAL needs shared-memory files unsupported on most network mounts. + // MEMORY journal avoids all file I/O for journaling (DELETE journal still + // tries to create a -journal file on disk, which fails on CIFS/NFS). + // synchronous=OFF is safe here: the index is fully rebuildable from source. + db.pragma('journal_mode = MEMORY'); + db.pragma('synchronous = OFF'); + // skip mmap — unreliable on network filesystems + } else { + db.pragma('journal_mode = WAL'); // node:sqlite supports WAL on every platform + db.pragma('synchronous = NORMAL'); // safe with WAL mode + db.pragma('mmap_size = 268435456'); // 256 MB memory-mapped I/O + } + db.pragma('cache_size = -64000'); // 64 MB page cache db.pragma('temp_store = MEMORY'); // temp tables in memory - db.pragma('mmap_size = 268435456'); // 256 MB memory-mapped I/O } /** @@ -61,9 +137,10 @@ export class DatabaseConnection { } // Create and configure database - const { db, backend } = createDatabase(dbPath); + const nolock = isNetworkFilesystem(dbPath); + const { db, backend } = createDatabase(dbPath, { nolock }); - configureConnection(db); + configureConnection(db, dbPath); // Run schema initialization const schemaPath = path.join(__dirname, 'schema.sql'); @@ -89,9 +166,10 @@ export class DatabaseConnection { throw new Error(`Database not found: ${dbPath}`); } - const { db, backend } = createDatabase(dbPath); + const nolock = isNetworkFilesystem(dbPath); + const { db, backend } = createDatabase(dbPath, { nolock }); - configureConnection(db); + configureConnection(db, dbPath); // Check and run migrations if needed const conn = new DatabaseConnection(db, dbPath, backend); diff --git a/src/db/sqlite-adapter.ts b/src/db/sqlite-adapter.ts index 37f0c790..25932fb7 100644 --- a/src/db/sqlite-adapter.ts +++ b/src/db/sqlite-adapter.ts @@ -43,10 +43,16 @@ export type SqliteBackend = 'node-sqlite'; class NodeSqliteAdapter implements SqliteDatabase { private _db: any; - constructor(dbPath: string) { + constructor(dbPath: string, options: { nolock?: boolean } = {}) { // eslint-disable-next-line @typescript-eslint/no-require-imports const { DatabaseSync } = require('node:sqlite'); - this._db = new DatabaseSync(dbPath); + // On network filesystems (CIFS/NFS) fcntl() locks are unreliable. + // nolock=1 bypasses SQLite's file-locking protocol entirely; safe when + // only one process accesses the database at a time (codegraph's typical use). + const openPath = options.nolock + ? `file://${dbPath}?nolock=1&mode=rwc` + : dbPath; + this._db = new DatabaseSync(openPath); } get open(): boolean { @@ -123,10 +129,13 @@ class NodeSqliteAdapter implements SqliteDatabase { * Returns the active backend alongside the db so each `DatabaseConnection` can * report it per-instance — MCP can open multiple project DBs in one process, so * a process-global would race. + * + * Pass `{ nolock: true }` when the database lives on a network filesystem + * (CIFS/NFS) where fcntl() locking is unreliable. */ -export function createDatabase(dbPath: string): { db: SqliteDatabase; backend: SqliteBackend } { +export function createDatabase(dbPath: string, options: { nolock?: boolean } = {}): { db: SqliteDatabase; backend: SqliteBackend } { try { - return { db: new NodeSqliteAdapter(dbPath), backend: 'node-sqlite' }; + return { db: new NodeSqliteAdapter(dbPath, options), backend: 'node-sqlite' }; } catch (error) { const msg = error instanceof Error ? error.message : String(error); throw new Error( diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c167d28b..b0e2c9dc 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -37,6 +37,7 @@ const WASM_GRAMMAR_FILES: Record = { scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', + julia: 'tree-sitter-julia.wasm', }; /** @@ -92,6 +93,7 @@ export const EXTENSION_MAP: Record = { '.sc': 'scala', '.lua': 'lua', '.luau': 'luau', + '.jl': 'julia', }; /** @@ -119,6 +121,7 @@ export function isPlayRoutesFile(filePath: string): boolean { ); } + /** * Caches for loaded grammars and parsers */ @@ -169,7 +172,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise> = { typescript: typescriptExtractor, @@ -47,4 +48,5 @@ export const EXTRACTORS: Partial> = { scala: scalaExtractor, lua: luaExtractor, luau: luauExtractor, + julia: juliaExtractor, }; diff --git a/src/extraction/languages/julia.ts b/src/extraction/languages/julia.ts new file mode 100644 index 00000000..f8807aad --- /dev/null +++ b/src/extraction/languages/julia.ts @@ -0,0 +1,401 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor } from '../tree-sitter-types'; + +/** + * Extract the name identifier from a Julia function signature node. + * + * The signature rule is one of: + * identifier → `function foo end` + * call_expression → `function foo(args...) end` + * typed_expression → `function foo(x)::T end` (return type annotation on sig) + * where_expression → `function foo(x::T) where T end` + * field_expression → `function Base.getindex(x) end` (qualified name) + */ +function extractFunctionName(signatureNode: SyntaxNode, source: string): string | null { + // Unwrap the tree-sitter 'signature' wrapper node + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (inner) return extractFunctionName(inner, source); + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'identifier' || signatureNode.type === 'field_expression') { + return getNodeText(signatureNode, source); + } + if (signatureNode.type === 'call_expression') { + // The first named child is the function name (identifier or field_expression) + const first = signatureNode.namedChild(0); + if (first) return getNodeText(first, source); + } + if (signatureNode.type === 'typed_expression') { + // typed_expression: '::' — recurse on left side + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + if (signatureNode.type === 'where_expression') { + // where_expression: 'where' — recurse on left side + const expr = signatureNode.namedChild(0); + if (expr) return extractFunctionName(expr, source); + } + return getNodeText(signatureNode, source); +} + +/** + * Extract a readable signature (parameter list + optional return type) from + * the Julia function signature node. + */ +function extractFunctionSignature(signatureNode: SyntaxNode, source: string): string | undefined { + // Unwrap the tree-sitter 'signature' wrapper node + if (signatureNode.type === 'signature') { + const inner = signatureNode.namedChild(0); + if (!inner) return undefined; + return extractFunctionSignature(inner, source); + } + + // Unwrap where_expression first + let sig = signatureNode; + let whereClause = ''; + if (sig.type === 'where_expression') { + const whereType = sig.namedChild(1); + if (whereType) whereClause = ' where ' + getNodeText(whereType, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + // Unwrap return type annotation + let returnType = ''; + if (sig.type === 'typed_expression') { + const retNode = sig.namedChild(1); + if (retNode) returnType = '::' + getNodeText(retNode, source); + const left = sig.namedChild(0); + if (left) sig = left; + } + + // Extract argument list from call_expression + if (sig.type === 'call_expression') { + const argsNode = sig.namedChild(1); // argument_list + if (argsNode) { + return getNodeText(argsNode, source) + returnType + whereClause; + } + } + + return undefined; +} + +/** + * Extract the name from a Julia type_head node (used in struct/abstract definitions). + * type_head can be: identifier, parametrized_type_expression (Foo{T}), binary_expression + * (for subtype declarations like `Foo <: Bar`), etc. + */ +function extractTypeName(typeHeadNode: SyntaxNode, source: string): string | null { + if (typeHeadNode.type === 'type_head') { + // Unwrap the type_head wrapper — recurse into its first named child + const inner = typeHeadNode.namedChild(0); + if (inner) return extractTypeName(inner, source); + return getNodeText(typeHeadNode, source); + } + if (typeHeadNode.type === 'identifier') { + return getNodeText(typeHeadNode, source); + } + if (typeHeadNode.type === 'call_expression' || typeHeadNode.type === 'parametrized_type_expression') { + // Parametric type: Foo{T, U} — first named child is the name + const first = typeHeadNode.namedChild(0); + if (first) return getNodeText(first, source); + } + if (typeHeadNode.type === 'binary_expression') { + // Subtype: `Foo <: Bar` — first named child is the name + const first = typeHeadNode.namedChild(0); + if (first) return extractTypeName(first, source); + } + if (typeHeadNode.type === 'where_expression') { + const expr = typeHeadNode.namedChild(0); + if (expr) return extractTypeName(expr, source); + } + // Fallback: use full text + return getNodeText(typeHeadNode, source); +} + +/** + * Extract field name from a struct field node. + * identifier → plain untyped field: `label` + * typed_expression → typed field: `x::Float64` + * assignment → field with default: `x::Int = 1` or `flag = false` + */ +function extractFieldName(node: SyntaxNode): string | null { + if (node.type === 'identifier') return node.text; + if (node.type === 'typed_expression') return node.firstNamedChild?.text ?? null; + if (node.type === 'assignment') { + const lhs = node.firstNamedChild; + if (lhs?.type === 'typed_expression') return lhs.firstNamedChild?.text ?? null; + if (lhs?.type === 'identifier') return lhs.text; + } + return null; +} + +/** + * Extract field type annotation from a struct field node. + */ +function extractFieldType(node: SyntaxNode, source: string): string | undefined { + if (node.type === 'typed_expression') { + const typeNode = node.namedChild(1); + return typeNode ? getNodeText(typeNode, source).trim() : undefined; + } + if (node.type === 'assignment') { + const lhs = node.firstNamedChild; + if (lhs?.type === 'typed_expression') { + const typeNode = lhs.namedChild(1); + return typeNode ? getNodeText(typeNode, source).trim() : undefined; + } + } + return undefined; +} + +/** + * True when node is a direct struct body field (not the type_head). + */ +function isStructField(node: SyntaxNode): boolean { + const parent = node.parent; + if (!parent) return false; + // Fields live inside the block body of a struct_definition + return parent.type === 'block' && parent.parent?.type === 'struct_definition'; +} + +export const juliaExtractor: LanguageExtractor = { + functionTypes: ['function_definition', 'macro_definition'], + classTypes: [], + methodTypes: ['function_definition'], // methods are just multiple-dispatch functions + interfaceTypes: ['abstract_definition'], + structTypes: ['struct_definition'], + enumTypes: [], + typeAliasTypes: [], + importTypes: ['import_statement', 'using_statement'], + callTypes: ['call_expression'], + variableTypes: ['const_statement'], + interfaceKind: 'interface', + + nameField: 'name', // not used directly — overridden in getName below + bodyField: 'body', + paramsField: 'signature', + returnField: undefined, + + /** + * Extract the name from a Julia AST node. + * Falls back to the default field-based approach for nodes without custom handling. + */ + getName: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (child.type !== 'block') { + return extractFunctionName(child, source); + } + } + return null; + } + + if (node.type === 'struct_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (child.type !== 'block') { + return extractTypeName(child, source); + } + } + return null; + } + + if (node.type === 'abstract_definition') { + const typeHead = node.namedChild(0); + if (typeHead) return extractTypeName(typeHead, source); + return null; + } + + if (node.type === 'module_definition') { + const nameNode = node.childForFieldName('name') ?? node.namedChild(0); + if (nameNode) return getNodeText(nameNode, source); + return null; + } + + return null; + }, + + getSignature: (node, source) => { + if (node.type === 'function_definition' || node.type === 'macro_definition') { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child || child.type === 'block') continue; + return extractFunctionSignature(child, source); + } + } + return undefined; + }, + + isAsync: (_node) => false, // Julia has @async macro, not a keyword modifier + + /** + * Julia doesn't use `field('body', ...)` in the grammar; bodies are plain + * named `block` children. Find the first `block` child on the node. + */ + resolveBody: (node, _bodyField) => { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'block') return child; + } + return null; + }, + + /** + * Custom visitor to handle: + * 1. Short-form function definitions: `add(x, y) = x + y` + * 2. `include("file.jl")` as relative file import + * 3. Struct field declarations inside struct bodies + * 4. `module_definition` as a namespace node + */ + visitNode: (node, ctx) => { + const source = ctx.source; + + // ── Struct fields ────────────────────────────────────────────────────────── + // Extract typed and untyped fields from struct bodies. + // `typed_expression` (x::Float64) and bare `identifier` (label) as direct + // children of a struct block; `assignment` handles default values (@with_kw). + if ( + (node.type === 'typed_expression' || node.type === 'identifier' || node.type === 'assignment') && + isStructField(node) + ) { + const fieldName = extractFieldName(node); + if (fieldName) { + const fieldType = extractFieldType(node, source); + const sig = fieldType ? `${fieldName}::${fieldType}` : fieldName; + ctx.createNode('field', fieldName, node, { signature: sig }); + } + return true; + } + + // ── Short-form function definitions ──────────────────────────────────────── + // `add(x, y) = x + y` → LHS is call_expression + // `f(x::T) where T = x` → LHS is where_expression wrapping call_expression + if (node.type === 'assignment') { + const lhs = node.namedChild(0); + + // Unwrap where_expression: `f(x::T) where T = ...` + let callExpr = lhs; + let whereClause = ''; + if (callExpr?.type === 'where_expression') { + const whereType = callExpr.namedChild(1); + if (whereType) whereClause = ' where ' + getNodeText(whereType, source); + callExpr = callExpr.namedChild(0) ?? null; + } + + if (callExpr?.type === 'call_expression') { + const nameNode = callExpr.namedChild(0); + const funcName = nameNode ? getNodeText(nameNode, source) : null; + if (!funcName) return false; // malformed — let default dispatch walk children + const argsNode = callExpr.namedChild(1); + const sig = argsNode ? getNodeText(argsNode, source) + whereClause : undefined; + ctx.createNode('function', funcName, node, { signature: sig }); + // Visit RHS for calls + const rhs = node.namedChild(node.namedChildCount - 1); + if (rhs && rhs !== lhs) ctx.visitNode(rhs); + return true; + } + // Plain assignment at top level (x = 42) — not extracted, but don't re-dispatch + // its children as function/import/call candidates (they'll be visited anyway via + // the default child-walk below returning false). + return false; + } + + // ── include("file.jl") as relative file import ───────────────────────────── + // Julia uses include() for relative file composition, not import/using. + if (node.type === 'call_expression') { + const callee = node.namedChild(0); + if (callee?.type === 'identifier' && callee.text === 'include') { + const args = node.namedChild(1); + const strLit = args?.namedChildren.find((n) => n.type === 'string_literal'); + const content = strLit?.namedChildren.find((n) => n.type === 'content'); + const filePath = content?.text?.trim(); + if (filePath && filePath.length < 512 && !filePath.includes('\0')) { + // Use the basename without extension as the module name (matches how + // the file will be indexed). Emit an `imports` reference so the resolver + // can wire up cross-file edges via suffix matching. + const baseName = filePath.replace(/\.jl$/i, '').replace(/.*[\\/]/, ''); + ctx.createNode('import', baseName, node, { + signature: `include("${filePath}")`, + }); + const parentId = ctx.nodeStack.length > 0 + ? ctx.nodeStack[ctx.nodeStack.length - 1] + : undefined; + if (parentId) { + ctx.addUnresolvedReference({ + fromNodeId: parentId, + referenceName: baseName, + referenceKind: 'imports', + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + return true; + } + } + // Not include() — fall through to default call extraction + return false; + } + + // ── module_definition as namespace ───────────────────────────────────────── + // Extract `module Foo ... end` as a 'module' kind (maps to NodeKind 'namespace'). + if (node.type === 'module_definition') { + const nameNode = node.childForFieldName('name') ?? node.namedChild(0); + if (!nameNode) return false; + const modName = getNodeText(nameNode, source); + const modNode = ctx.createNode('namespace', modName, node, {}); + if (modNode) { + ctx.pushScope(modNode.id); + // Visit all children inside the module body + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child && child !== nameNode) ctx.visitNode(child); + } + ctx.popScope(); + } + return true; + } + + return false; + }, + + extractImport: (node, source) => { + const importText = source.substring(node.startIndex, node.endIndex).trim(); + + const firstChild = node.namedChild(0); + if (!firstChild) return { moduleName: importText, signature: importText }; + + // selected_import: `using Foo: bar, baz` or `import Foo: bar` → module is `Foo` + if (firstChild.type === 'selected_import') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { + moduleName: getNodeText(pathNode, source), + signature: importText, + }; + } + } + + // import_path: `.Foo` (relative import) + if (firstChild.type === 'import_path') { + return { moduleName: getNodeText(firstChild, source), signature: importText }; + } + + // import_alias: `Foo as F` + if (firstChild.type === 'import_alias') { + const pathNode = firstChild.namedChild(0); + if (pathNode) { + return { moduleName: getNodeText(pathNode, source), signature: importText }; + } + } + + // Scoped identifier: `Foo.Bar.Baz` — take first part + const text = getNodeText(firstChild, source); + const topModule = text.split('.')[0] ?? text; + return { moduleName: topModule, signature: importText }; + }, +}; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index c3a6b94e..a059a53c 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -120,6 +120,13 @@ export interface LanguageExtractor { // --- Existing hooks --- + /** + * Override name extraction for languages where the name is not a direct field child. + * When provided, replaces the default `nameField`-based lookup in `extractName`. + * Return null to fall back to the default logic. + */ + getName?: (node: SyntaxNode, source: string) => string | null; + /** Extract signature from node */ getSignature?: (node: SyntaxNode, source: string) => string | undefined; /** Extract visibility from node */ diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 99c7f9aa..10391d48 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -35,6 +35,12 @@ export { generateNodeId } from './tree-sitter-helpers'; * Extract the name from a node based on language */ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { + // Language-specific name extraction hook + if (extractor.getName) { + const customName = extractor.getName(node, source); + if (customName !== null) return customName || ''; + } + // Try field name first const nameNode = getChildByField(node, extractor.nameField); if (nameNode) { @@ -782,7 +788,8 @@ export class TreeSitterExtractor { if (!this.extractor) return; // Skip forward declarations and type references (no body = not a definition) - const body = getChildByField(node, this.extractor.bodyField); + const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) + ?? getChildByField(node, this.extractor.bodyField); if (!body) return; const name = extractName(node, this.source, this.extractor); @@ -1177,6 +1184,23 @@ export class TreeSitterExtractor { const initSignature = initValue ? `= ${initValue}${initValue.length >= 100 ? '...' : ''}` : undefined; this.createNode(kind, name, nameNode, { docstring, signature: initSignature, isExported }); }); + } else if (this.language === 'julia') { + // Julia: const_statement → assignment → identifier (name) [op] value + const assignment = node.namedChild(0); + if (assignment) { + const nameNode = assignment.namedChild(0); + if (nameNode?.type === 'identifier') { + const name = getNodeText(nameNode, this.source); + const valueNode = assignment.namedChildCount > 1 + ? assignment.namedChild(assignment.namedChildCount - 1) + : null; + const initValue = valueNode && valueNode !== nameNode + ? getNodeText(valueNode, this.source).slice(0, 100) + : undefined; + const initSignature = initValue ? `= ${initValue}` : undefined; + this.createNode('constant', name, nameNode, { docstring, signature: initSignature, isExported }); + } + } } else { // Generic fallback for other languages // Try to find identifier children diff --git a/src/extraction/wasm/tree-sitter-julia.wasm b/src/extraction/wasm/tree-sitter-julia.wasm new file mode 100644 index 00000000..904f26a7 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-julia.wasm differ diff --git a/src/types.ts b/src/types.ts index 0168665d..f577afd2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -89,6 +89,7 @@ export const LANGUAGES = [ 'luau', 'yaml', 'twig', + 'julia', 'unknown', ] as const; diff --git a/vitest.config.ts b/vitest.config.ts index 2449a989..814a0723 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,10 +1,23 @@ import { defineConfig } from 'vitest/config'; +const NODE_MAJOR = Number.parseInt(process.versions.node.split('.')[0] ?? '0', 10); + +// The V8 turboshaft WASM Zone OOM bug that crashes tree-sitter grammar +// compilation exists in Node 22–25.x. Node 26+ fixes it; forcing +// --liftoff-only on Node 26 Windows is empirically tied to a fork-worker +// teardown crash in tinypool, so only apply the flag where it's needed. +const NEEDS_LIFTOFF_ONLY = NODE_MAJOR >= 22 && NODE_MAJOR <= 25; + export default defineConfig({ test: { globals: true, environment: 'node', include: ['__tests__/**/*.test.ts'], + poolOptions: { + forks: { + execArgv: NEEDS_LIFTOFF_ONLY ? ['--liftoff-only'] : [], + }, + }, coverage: { provider: 'v8', reporter: ['text', 'json', 'html'],