Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion __tests__/foundation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ describe('Database Connection', () => {

const version = db.getSchemaVersion();
expect(version).not.toBeNull();
expect(version?.version).toBe(3);
expect(version?.version).toBe(17);

db.close();
});
Expand Down
95 changes: 95 additions & 0 deletions __tests__/migrations-registry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* Migration registry: structural invariants.
*
* Guards against the silent-no-op bug class that motivated this
* refactor. If a future PR introduces a duplicate version,
* out-of-order versions, or fails to register a new migration
* file, one of these tests fails loudly.
*/
import { describe, it, expect } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import {
ALL_MIGRATIONS,
CURRENT_SCHEMA_VERSION,
} from '../src/db/migrations';

describe('migration registry — structural invariants', () => {
it('registry is non-empty', () => {
expect(ALL_MIGRATIONS.length).toBeGreaterThan(0);
});

it('versions are unique', () => {
const seen = new Set<number>();
for (const m of ALL_MIGRATIONS) {
expect(seen.has(m.version)).toBe(false);
seen.add(m.version);
}
});

it('versions are strictly ascending', () => {
for (let i = 1; i < ALL_MIGRATIONS.length; i++) {
expect(ALL_MIGRATIONS[i]!.version).toBeGreaterThan(
ALL_MIGRATIONS[i - 1]!.version
);
}
});

it('each migration has a non-empty description and a function up()', () => {
for (const m of ALL_MIGRATIONS) {
expect(m.description.length).toBeGreaterThan(0);
expect(typeof m.up).toBe('function');
}
});

it('CURRENT_SCHEMA_VERSION matches the highest registered version', () => {
const max = ALL_MIGRATIONS[ALL_MIGRATIONS.length - 1]!.version;
expect(CURRENT_SCHEMA_VERSION).toBe(max);
});
});

describe('migration files — filename ↔ version coupling', () => {
// Read the actual filenames on disk and assert each matches an
// entry in the registry. Catches the case where someone drops a
// new file in src/db/migrations/ but forgets to register it.
const migrationsDir = path.resolve(__dirname, '../src/db/migrations');
const SUPPORT_FILES = new Set(['index.ts', 'types.ts']);
const STRICT_NNN_PATTERN = /^\d{3}-[a-z0-9]+(?:-[a-z0-9]+)*\.ts$/;

function listMigrationFiles(): string[] {
return fs.readdirSync(migrationsDir).filter((f) => f.endsWith('.ts') && !SUPPORT_FILES.has(f));
}

it('every migration file matches the strict `NNN-kebab-name.ts` pattern', () => {
const offenders: string[] = [];
for (const f of listMigrationFiles()) {
if (!STRICT_NNN_PATTERN.test(f)) {
offenders.push(f);
}
}
expect(offenders).toEqual([]);
});

it('every src/db/migrations/NNN-*.ts file is registered (no orphan files)', () => {
const files = listMigrationFiles().filter((f) => STRICT_NNN_PATTERN.test(f));
expect(files.length).toBeGreaterThan(0);
const registeredVersions = new Set(ALL_MIGRATIONS.map((m) => m.version));
for (const f of files) {
const version = parseInt(f.slice(0, 3), 10);
if (!registeredVersions.has(version)) {
throw new Error(
`Migration file ${f} exists on disk but is not registered in src/db/migrations/index.ts. ` +
`Add an import + array entry for it.`
);
}
}
});

it('every registered version has a matching NNN-*.ts file (no phantom registrations)', () => {
const files = listMigrationFiles().filter((f) => STRICT_NNN_PATTERN.test(f));
const filenameVersions = new Set(files.map((f) => parseInt(f.slice(0, 3), 10)));
for (const m of ALL_MIGRATIONS) {
expect(filenameVersions.has(m.version)).toBe(true);
}
});
});
2 changes: 1 addition & 1 deletion __tests__/pr19-improvements.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ describe('Best-Candidate Resolution', () => {
describe('Schema v2 Migration', () => {
it.skipIf(!HAS_SQLITE)('should have correct current schema version', async () => {
const { CURRENT_SCHEMA_VERSION } = await import('../src/db/migrations');
expect(CURRENT_SCHEMA_VERSION).toBe(3);
expect(CURRENT_SCHEMA_VERSION).toBe(17);
});

it.skipIf(!HAS_SQLITE)('should have migration for version 2', async () => {
Expand Down
119 changes: 119 additions & 0 deletions scripts/spikes/spike-edge-indexes.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env node
/**
* Spike: redundant edge indexes
*
* Drops `idx_edges_source` and `idx_edges_target` and measures
* the impact on:
* - DB size
* - Bulk-insert throughput
* - Latency for `WHERE source = ?` and `WHERE target = ?`
* (the two queries that previously hit the dropped indexes)
*
* The hypothesis: SQLite covers source-only / target-only lookups
* via the wider `(source, kind)` and `(target, kind)` composite
* indexes through left-prefix scan, so dropping the narrow ones
* costs nothing on the read side but saves space and write time.
*
* Synthesises 50K nodes / 250K edges so the measurement scales to
* what real users will hit; codegraph's own DB at ~2K nodes is too
* small for index choices to surface.
*/
import Database from 'better-sqlite3';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';

const NODES = 50_000;
const EDGES_PER_NODE = 5;

function ms(start) { return Number(process.hrtime.bigint() - start) / 1_000_000; }
function fmt(n) { return n < 10 ? n.toFixed(2) : n.toFixed(0); }

console.log('\n=== Spike: redundant edge indexes ===\n');
console.log(`Synthesizing ${NODES.toLocaleString()} nodes, ${(NODES*EDGES_PER_NODE).toLocaleString()} edges...`);

function buildEdgesDb({ withRedundant }) {
const dbPath = path.join(os.tmpdir(), `spike-edges-${Date.now()}-${Math.random()}.db`);
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -64000');
db.exec(`
CREATE TABLE nodes (id TEXT PRIMARY KEY, kind TEXT NOT NULL, name TEXT NOT NULL);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL, target TEXT NOT NULL, kind TEXT NOT NULL,
line INTEGER, col INTEGER
);
CREATE INDEX idx_edges_kind ON edges(kind);
CREATE INDEX idx_edges_source_kind ON edges(source, kind);
CREATE INDEX idx_edges_target_kind ON edges(target, kind);
`);
if (withRedundant) {
db.exec(`
CREATE INDEX idx_edges_source ON edges(source);
CREATE INDEX idx_edges_target ON edges(target);
`);
}

const insNode = db.prepare('INSERT INTO nodes (id, kind, name) VALUES (?, ?, ?)');
const insEdge = db.prepare('INSERT INTO edges (source, target, kind, line, col) VALUES (?, ?, ?, ?, ?)');
const KINDS = ['calls', 'imports', 'references', 'type_of', 'extends', 'instantiates'];
const tStart = process.hrtime.bigint();
db.transaction(() => {
for (let i = 0; i < NODES; i++) {
insNode.run(`n${i}`, 'function', `name${i}`);
}
for (let i = 0; i < NODES; i++) {
for (let j = 0; j < EDGES_PER_NODE; j++) {
const tgt = `n${(i + j + 1) % NODES}`;
const kind = KINDS[j % KINDS.length];
insEdge.run(`n${i}`, tgt, kind, i, j);
}
}
})();
const insertMs = ms(tStart);
db.exec('PRAGMA optimize');

return { db, dbPath, size: fs.statSync(dbPath).size, insertMs };
}

const baseline = buildEdgesDb({ withRedundant: true });
const stripped = buildEdgesDb({ withRedundant: false });

console.log('');
console.log(` baseline (with redundant): size=${(baseline.size / 1024 / 1024).toFixed(1)} MB · bulk insert=${fmt(baseline.insertMs)}ms`);
console.log(` stripped : size=${(stripped.size / 1024 / 1024).toFixed(1)} MB · bulk insert=${fmt(stripped.insertMs)}ms`);
const sizeDelta = ((baseline.size - stripped.size) / baseline.size * 100).toFixed(1);
const insertSpeedup = (baseline.insertMs / stripped.insertMs).toFixed(2);
console.log(` Δ size: -${sizeDelta}% · Δ bulk insert: ${insertSpeedup}× faster without redundant indexes`);

function timeQueries(db, label) {
const N = 500;
const sourceOnly = db.prepare('SELECT COUNT(*) FROM edges WHERE source = ?');
const targetOnly = db.prepare('SELECT COUNT(*) FROM edges WHERE target = ?');
let t = process.hrtime.bigint();
for (let i = 0; i < N; i++) sourceOnly.get(`n${i % NODES}`);
const sourceMs = ms(t) / N;
t = process.hrtime.bigint();
for (let i = 0; i < N; i++) targetOnly.get(`n${i % NODES}`);
const targetMs = ms(t) / N;
console.log(` ${label}: WHERE source=? avg ${fmt(sourceMs)}ms · WHERE target=? avg ${fmt(targetMs)}ms`);
return { sourceMs, targetMs };
}
console.log('');
const baseQ = timeQueries(baseline.db, 'baseline');
const strQ = timeQueries(stripped.db, 'stripped');
console.log(` query speed delta: source ${(strQ.sourceMs / baseQ.sourceMs).toFixed(2)}× · target ${(strQ.targetMs / baseQ.targetMs).toFixed(2)}× (>1 = stripped slower)`);

// EXPLAIN-confirm that the stripped DB still uses an index for these
// queries — we want to know it's a covering scan, not a table scan.
const plan = stripped.db.prepare('EXPLAIN QUERY PLAN SELECT COUNT(*) FROM edges WHERE source = ?').all('n0');
console.log('');
console.log(' EXPLAIN (stripped, source=?):');
for (const row of plan) console.log(` ${row.detail}`);

baseline.db.close(); stripped.db.close();
fs.unlinkSync(baseline.dbPath); fs.unlinkSync(stripped.dbPath);

console.log('\n=== Done ===\n');
93 changes: 31 additions & 62 deletions src/db/migrations.ts
Original file line number Diff line number Diff line change
@@ -1,60 +1,26 @@
/**
* Database Migrations
* Database Migrations — runner + backward-compat surface.
*
* Schema versioning and migration support.
* The migration definitions themselves live in
* `./migrations/<NNN>-<name>.ts`, one file per migration, with
* version derived from the filename prefix. This file is the
* runner (read schema_versions, apply pending in order) and the
* stable API surface that the rest of the codebase imports.
*
* Adding a migration: see `./migrations/index.ts`.
*/

import { SqliteDatabase } from './sqlite-adapter';
import { ALL_MIGRATIONS, CURRENT_SCHEMA_VERSION as REGISTRY_CURRENT } from './migrations/index';
import type { Migration } from './migrations/types';

/**
* Current schema version
* Highest registered migration version. Derived from the
* registry; re-exported here unchanged so existing consumers
* (`import { CURRENT_SCHEMA_VERSION } from './migrations'`) keep
* working.
*/
export const CURRENT_SCHEMA_VERSION = 3;

/**
* Migration definition
*/
interface Migration {
version: number;
description: string;
up: (db: SqliteDatabase) => void;
}

/**
* All migrations in order
*
* Note: Version 1 is the initial schema, handled by schema.sql
* Future migrations go here.
*/
const migrations: Migration[] = [
{
version: 2,
description: 'Add project metadata, provenance tracking, and unresolved ref context',
up: (db) => {
db.exec(`
CREATE TABLE IF NOT EXISTS project_metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER NOT NULL
);
ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT NOT NULL DEFAULT '';
ALTER TABLE unresolved_refs ADD COLUMN language TEXT NOT NULL DEFAULT 'unknown';
ALTER TABLE edges ADD COLUMN provenance TEXT DEFAULT NULL;
CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
`);
},
},
{
version: 3,
description: 'Add lower(name) expression index for memory-efficient case-insensitive lookups',
up: (db) => {
db.exec(`
CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
`);
},
},
];
export const CURRENT_SCHEMA_VERSION: number = REGISTRY_CURRENT;

/**
* Get the current schema version from the database
Expand Down Expand Up @@ -84,17 +50,14 @@ function recordMigration(db: SqliteDatabase, version: number, description: strin
* Run all pending migrations
*/
export function runMigrations(db: SqliteDatabase, fromVersion: number): void {
const pending = migrations.filter((m) => m.version > fromVersion);

if (pending.length === 0) {
return;
}
const pending = ALL_MIGRATIONS.filter((m) => m.version > fromVersion);
if (pending.length === 0) return;

// Sort by version
pending.sort((a, b) => a.version - b.version);
// ALL_MIGRATIONS is already sorted by version, but filtering can
// be cheap to re-confirm.
const ordered = [...pending].sort((a, b) => a.version - b.version);

// Run each migration in a transaction
for (const migration of pending) {
for (const migration of ordered) {
db.transaction(() => {
migration.up(db);
recordMigration(db, migration.version, migration.description);
Expand All @@ -111,13 +74,15 @@ export function needsMigration(db: SqliteDatabase): boolean {
}

/**
* Get list of pending migrations
* Get list of pending migrations.
*
* Returned as a fresh mutable array (not the underlying readonly
* registry) so callers that previously assigned the result to a
* `Migration[]`-typed variable keep working unchanged.
*/
export function getPendingMigrations(db: SqliteDatabase): Migration[] {
const current = getCurrentVersion(db);
return migrations
.filter((m) => m.version > current)
.sort((a, b) => a.version - b.version);
return ALL_MIGRATIONS.filter((m) => m.version > current).slice();
}

/**
Expand All @@ -136,3 +101,7 @@ export function getMigrationHistory(
description: row.description,
}));
}

// Re-export the registry surface for callers that want it.
export { ALL_MIGRATIONS } from './migrations/index';
export type { Migration, MigrationModule } from './migrations/types';
19 changes: 19 additions & 0 deletions src/db/migrations/002-project-metadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import type { MigrationModule } from './types';

export const MIGRATION: MigrationModule = {
description: 'Add project metadata, provenance tracking, and unresolved ref context',
up: (db) => {
db.exec(`
CREATE TABLE IF NOT EXISTS project_metadata (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
updated_at INTEGER NOT NULL
);
ALTER TABLE unresolved_refs ADD COLUMN file_path TEXT NOT NULL DEFAULT '';
ALTER TABLE unresolved_refs ADD COLUMN language TEXT NOT NULL DEFAULT 'unknown';
ALTER TABLE edges ADD COLUMN provenance TEXT DEFAULT NULL;
CREATE INDEX IF NOT EXISTS idx_unresolved_file_path ON unresolved_refs(file_path);
CREATE INDEX IF NOT EXISTS idx_edges_provenance ON edges(provenance);
`);
},
};
10 changes: 10 additions & 0 deletions src/db/migrations/003-lower-name-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import type { MigrationModule } from './types';

export const MIGRATION: MigrationModule = {
description: 'Add lower(name) expression index for memory-efficient case-insensitive lookups',
up: (db) => {
db.exec(`
CREATE INDEX IF NOT EXISTS idx_nodes_lower_name ON nodes(lower(name));
`);
},
};
Loading