diff --git a/docs/design/phase-1b-config-import-audit.md b/docs/design/phase-1b-config-import-audit.md new file mode 100644 index 0000000..f3bdb45 --- /dev/null +++ b/docs/design/phase-1b-config-import-audit.md @@ -0,0 +1,58 @@ +# Phase 1B config import audit + + +Total `import { config } from` sites under `src/`: **51** + +| File | Initial class | +| --- | --- | +| `src/services/memory-audn.ts` | mixed or construction-time | +| `src/services/memory-ingest.ts` | mixed or construction-time | +| `src/services/retrieval-format.ts` | mixed or construction-time | +| `src/services/memory-crud.ts` | mixed or construction-time | +| `src/services/retrieval-policy.ts` | mixed or construction-time | +| `src/services/reranker.ts` | mixed or construction-time | +| `src/services/write-security.ts` | request-time/module-read | +| `src/services/consensus-validation.ts` | request-time/module-read | +| `src/services/embedding.ts` | mixed or construction-time | +| `src/services/agentic-retrieval.ts` | request-time/module-read | +| `src/services/chunked-extraction.ts` | mixed or construction-time | +| `src/services/consensus-extraction.ts` | constant-or-env bootstrap | +| `src/services/conflict-policy.ts` | mixed or construction-time | +| `src/services/__tests__/retrieval-trace.test.ts` | mixed or construction-time | +| `src/services/extraction-cache.ts` | request-time/module-read | +| `src/services/__tests__/current-state-retrieval-regression.test.ts` | constant-or-env bootstrap | +| `src/services/cost-telemetry.ts` | mixed or construction-time | +| `src/services/lesson-service.ts` | request-time/module-read | +| `src/services/deferred-audn.ts` | request-time/module-read | +| `src/services/__tests__/staged-loading.test.ts` | mixed or construction-time | +| `src/__tests__/route-validation.test.ts` | mixed or construction-time | +| `src/services/consolidation-service.ts` | request-time/module-read | +| `src/__tests__/smoke.test.ts` | request-time/module-read | +| `src/services/composite-grouping.ts` | mixed or construction-time | +| `src/services/query-expansion.ts` | mixed or construction-time | +| `src/services/retrieval-trace.ts` | constant-or-env bootstrap | +| `src/services/__tests__/deferred-audn.test.ts` | request-time/module-read | +| `src/services/search-pipeline.ts` | mixed or construction-time | +| `src/services/memory-search.ts` | mixed or construction-time | +| `src/services/__tests__/write-security.test.ts` | request-time/module-read | +| `src/services/llm.ts` | mixed or construction-time | +| `src/services/memory-storage.ts` | mixed or construction-time | +| `src/app/__tests__/composed-boot-parity.test.ts` | mixed or construction-time | +| `src/app/__tests__/runtime-container.test.ts` | request-time/module-read | +| `src/db/repository-lessons.ts` | mixed or construction-time | +| `src/db/migrate.ts` | constant-or-env bootstrap | +| `src/db/repository-entities.ts` | mixed or construction-time | +| `src/db/repository-read.ts` | mixed or construction-time | +| `src/db/repository-links.ts` | mixed or construction-time | +| `src/db/query-helpers.ts` | mixed or construction-time | +| `src/db/repository-vector-search.ts` | mixed or construction-time | +| `src/db/agent-trust-repository.ts` | mixed or construction-time | +| `src/db/__tests__/test-fixtures.ts` | mixed or construction-time | +| `src/db/__tests__/claim-slot-backfill.test.ts` | mixed or construction-time | +| `src/db/__tests__/links.test.ts` | mixed or construction-time | +| `src/db/repository-representations.ts` | mixed or construction-time | +| `src/db/__tests__/dual-write-representations.test.ts` | request-time/module-read | +| `src/db/__tests__/mutation-audit.test.ts` | mixed or construction-time | +| `src/db/__tests__/temporal-invalidation.test.ts` | mixed or construction-time | +| `src/db/__tests__/temporal-neighbors.test.ts` | mixed or construction-time | +| `src/db/__tests__/canonical-memory-objects.test.ts` | mixed or construction-time | diff --git a/src/__tests__/config-singleton-audit.test.ts b/src/__tests__/config-singleton-audit.test.ts new file mode 100644 index 0000000..fdf3f03 --- /dev/null +++ b/src/__tests__/config-singleton-audit.test.ts @@ -0,0 +1,106 @@ +/** + * Config singleton import regression gate. + * + * Counts the non-test source files that bind the module-level config + * singleton value from config.js (any import/export pattern). The threshold + * should only move DOWN as config-threading PRs land. Any PR that adds + * a new singleton import must raise the threshold explicitly — that + * friction is the point. + * + * This test does not depend on a live database or runtime — it reads + * source files statically, matching the pattern in + * deployment-config.test.ts. + */ + +import { describe, it, expect } from 'vitest'; +import { readFileSync, readdirSync, statSync } from 'node:fs'; +import { resolve, dirname, extname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const SRC = resolve(__dirname, '..'); + +/** + * Maximum allowed non-test source files that bind the runtime config + * singleton value from config.js. Ratchet this DOWN after each + * config-threading PR lands. + * Current baseline: 34 files after the ingest/lineage config-threading + * cleanup removed those last singleton reads from `memory-lineage.ts` + * and `memory-ingest.ts`. + * Includes multi-import forms (e.g. `import { config, updateRuntimeConfig }`) + * and re-exports (e.g. `export { config } from`). + */ +const MAX_SINGLETON_IMPORTS = 34; + +/** + * Matches any import or re-export that binds the `config` value (not + * just a type) from a path ending in `config.js` or `config`. Covers + * single-line and multiline import blocks: + * import { config } from '../config.js' + * import { config, updateRuntimeConfig } from '../config.js' + * import {\n config,\n updateRuntimeConfig,\n} from '../config.js' + * export { config, ... } from './config.js' + * Excludes `import type`-only statements. + */ +const CONFIG_BINDING_RE = /(?:import|export)\s*\{[^}]*\bconfig\b[^}]*\}\s*from\s*['"][^'"]*config/s; +const IMPORT_TYPE_ONLY_RE = /import\s+type\s*\{/; + +function collectTsFiles(dir: string): string[] { + const results: string[] = []; + for (const entry of readdirSync(dir, { withFileTypes: true })) { + const full = resolve(dir, entry.name); + if (entry.isDirectory()) { + if (entry.name === '__tests__' || entry.name === 'node_modules') continue; + results.push(...collectTsFiles(full)); + } else if (entry.isFile() && extname(entry.name) === '.ts') { + results.push(full); + } + } + return results; +} + +function findSingletonImporters(): string[] { + const files = collectTsFiles(SRC); + const matches: string[] = []; + for (const filePath of files) { + const content = readFileSync(filePath, 'utf-8'); + // Find all import/export blocks from a config path that bind `config` + const hits = content.match(new RegExp(CONFIG_BINDING_RE.source, 'gs')) ?? []; + const hasRuntimeBinding = hits.some((hit) => !IMPORT_TYPE_ONLY_RE.test(hit)); + if (hasRuntimeBinding) matches.push(filePath); + } + return matches.sort(); +} + +describe('config singleton regression gate', () => { + it(`non-test source files importing config singleton must not exceed ${MAX_SINGLETON_IMPORTS}`, () => { + const files = findSingletonImporters(); + + expect(files.length).toBeLessThanOrEqual(MAX_SINGLETON_IMPORTS); + + // Print the list on failure so the developer knows exactly which + // files to inspect or thread. + if (files.length > MAX_SINGLETON_IMPORTS) { + console.error( + `Config singleton imports (${files.length}) exceed threshold (${MAX_SINGLETON_IMPORTS}):\n` + + files.map((f) => ` ${f}`).join('\n'), + ); + } + }); + + it('threshold is not stale (count should be close to threshold)', () => { + const files = findSingletonImporters(); + const slack = MAX_SINGLETON_IMPORTS - files.length; + + // If the threshold has more than 5 files of slack, a threading PR + // landed without ratcheting the threshold down. Warn but don't fail + // — the primary gate is the upper-bound test above. + if (slack > 5) { + console.warn( + `Config singleton threshold has ${slack} files of slack ` + + `(threshold=${MAX_SINGLETON_IMPORTS}, actual=${files.length}). ` + + `Consider ratcheting MAX_SINGLETON_IMPORTS down to ${files.length + 2}.`, + ); + } + }); +}); diff --git a/src/__tests__/memory-route-config-seam.test.ts b/src/__tests__/memory-route-config-seam.test.ts new file mode 100644 index 0000000..929579c --- /dev/null +++ b/src/__tests__/memory-route-config-seam.test.ts @@ -0,0 +1,178 @@ +/** + * Route-level config seam tests for createMemoryRouter. + * + * Verifies that read-side route config now comes from the injected adapter + * rather than the module-level singleton for health/config responses and + * search-limit clamping. + */ + +import express from 'express'; +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; +import { createMemoryRouter } from '../routes/memories.js'; +import type { MemoryService } from '../services/memory-service.js'; + +interface BootedApp { + baseUrl: string; + close: () => Promise; +} + +interface MutableRouteConfig { + retrievalProfile: string; + embeddingProvider: 'openai'; + embeddingModel: string; + llmProvider: 'openai'; + llmModel: string; + clarificationConflictThreshold: number; + maxSearchResults: number; + hybridSearchEnabled: boolean; + iterativeRetrievalEnabled: boolean; + entityGraphEnabled: boolean; + crossEncoderEnabled: boolean; + agenticRetrievalEnabled: boolean; + repairLoopEnabled: boolean; +} + +async function bindEphemeral(app: ReturnType): Promise { + const server = app.listen(0); + await new Promise((resolve) => server.once('listening', () => resolve())); + const addr = server.address(); + const port = typeof addr === 'object' && addr ? addr.port : 0; + return { + baseUrl: `http://localhost:${port}`, + close: () => new Promise((resolve) => server.close(() => resolve())), + }; +} + +describe('memory route config seam', () => { + let booted: BootedApp; + let routeConfig: MutableRouteConfig; + const search = vi.fn(); + + beforeAll(async () => { + routeConfig = { + retrievalProfile: 'route-adapter-profile', + embeddingProvider: 'openai', + embeddingModel: 'adapter-embedding-model', + llmProvider: 'openai', + llmModel: 'adapter-llm-model', + clarificationConflictThreshold: 0.91, + maxSearchResults: 3, + hybridSearchEnabled: true, + iterativeRetrievalEnabled: false, + entityGraphEnabled: true, + crossEncoderEnabled: true, + agenticRetrievalEnabled: false, + repairLoopEnabled: true, + }; + + search.mockResolvedValue({ + memories: [], + injectionText: '', + citations: [], + retrievalMode: 'flat', + }); + + const service = { + search, + fastSearch: vi.fn(), + workspaceSearch: vi.fn(), + ingest: vi.fn(), + quickIngest: vi.fn(), + storeVerbatim: vi.fn(), + workspaceIngest: vi.fn(), + expand: vi.fn(), + expandInWorkspace: vi.fn(), + list: vi.fn(), + listInWorkspace: vi.fn(), + getStats: vi.fn(), + consolidate: vi.fn(), + executeConsolidation: vi.fn(), + evaluateDecay: vi.fn(), + archiveDecayed: vi.fn(), + checkCap: vi.fn(), + getMutationSummary: vi.fn(), + getRecentMutations: vi.fn(), + getAuditTrail: vi.fn(), + getLessons: vi.fn(), + getLessonStats: vi.fn(), + reportLesson: vi.fn(), + deactivateLesson: vi.fn(), + reconcileDeferred: vi.fn(), + resetBySource: vi.fn(), + get: vi.fn(), + delete: vi.fn(), + } as unknown as MemoryService; + + const configRouteAdapter = { + current: () => ({ ...routeConfig }), + update: (updates: { maxSearchResults?: number }) => { + if (updates.maxSearchResults !== undefined) { + routeConfig.maxSearchResults = updates.maxSearchResults; + } + return Object.keys(updates); + }, + }; + + const app = express(); + app.use(express.json()); + app.use('/memories', createMemoryRouter(service, configRouteAdapter)); + booted = await bindEphemeral(app); + }); + + beforeEach(() => { + search.mockClear(); + routeConfig.maxSearchResults = 3; + }); + + afterAll(async () => { + await booted.close(); + }); + + it('serves health/config payloads from the injected adapter snapshot', async () => { + const healthRes = await fetch(`${booted.baseUrl}/memories/health`); + expect(healthRes.status).toBe(200); + const healthBody = await healthRes.json(); + expect(healthBody.config.retrieval_profile).toBe('route-adapter-profile'); + expect(healthBody.config.max_search_results).toBe(3); + + const putRes = await fetch(`${booted.baseUrl}/memories/config`, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ max_search_results: 7 }), + }); + expect(putRes.status).toBe(200); + const putBody = await putRes.json(); + expect(putBody.applied).toContain('maxSearchResults'); + expect(putBody.config.max_search_results).toBe(7); + + const updatedHealthRes = await fetch(`${booted.baseUrl}/memories/health`); + const updatedHealthBody = await updatedHealthRes.json(); + expect(updatedHealthBody.config.max_search_results).toBe(7); + }); + + it('clamps search limits using the injected adapter snapshot', async () => { + await fetch(`${booted.baseUrl}/memories/search`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + user_id: 'user-1', + query: 'route seam query', + limit: 50, + }), + }); + + expect(search).toHaveBeenCalledWith( + 'user-1', + 'route seam query', + undefined, + 3, + undefined, + undefined, + undefined, + { + retrievalMode: undefined, + tokenBudget: undefined, + }, + ); + }); +}); diff --git a/src/__tests__/route-validation.test.ts b/src/__tests__/route-validation.test.ts index 30d5b09..fe48f21 100644 --- a/src/__tests__/route-validation.test.ts +++ b/src/__tests__/route-validation.test.ts @@ -21,17 +21,13 @@ vi.mock('../services/embedding.js', async (importOriginal) => { }); import { pool } from '../db/pool.js'; -import { config } from '../config.js'; import { MemoryRepository } from '../db/memory-repository.js'; import { ClaimRepository } from '../db/claim-repository.js'; import { MemoryService } from '../services/memory-service.js'; import { createMemoryRouter } from '../routes/memories.js'; +import { setupTestSchema } from '../db/__tests__/test-fixtures.js'; import express from 'express'; -import { readFileSync } from 'node:fs'; -import { resolve, dirname } from 'node:path'; -import { fileURLToPath } from 'node:url'; -const __dirname = dirname(fileURLToPath(import.meta.url)); const TEST_USER = 'route-validation-test-user'; const VALID_UUID = '00000000-0000-0000-0000-000000000001'; const INVALID_UUID = 'not-a-uuid'; @@ -42,9 +38,7 @@ const app = express(); app.use(express.json()); beforeAll(async () => { - const raw = readFileSync(resolve(__dirname, '../db/schema.sql'), 'utf-8'); - const sql = raw.replace(/\{\{EMBEDDING_DIMENSIONS\}\}/g, String(config.embeddingDimensions)); - await pool.query(sql); + await setupTestSchema(pool); const repo = new MemoryRepository(pool); const claimRepo = new ClaimRepository(pool); @@ -124,7 +118,7 @@ describe('GET /memories/list — source_site filter', () => { }); describe('POST /memories/search — scope and observability contract', () => { - it('returns canonical user scope and only includes observability fields that are actually emitted', async () => { + it('returns canonical user scope and only includes observability sections that the retrieval path actually emitted', async () => { const res = await fetch(`${baseUrl}/memories/search`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -138,10 +132,9 @@ describe('POST /memories/search — scope and observability contract', () => { expect(res.status).toBe(200); const body = await res.json(); expect(body.scope).toEqual({ kind: 'user', userId: TEST_USER }); - expect(body.observability).toBeDefined(); - expect(body.observability.retrieval ?? null).toBe(null); - expect(body.observability.packaging).toBeDefined(); - expect(body.observability.assembly).toBeDefined(); + expect(body.observability?.retrieval).toBeUndefined(); + expect(body.observability?.packaging?.packageType).toBe('subject-pack'); + expect(body.observability?.assembly?.blocks).toEqual(['subject']); }); it('returns canonical workspace scope for workspace searches', async () => { diff --git a/src/app/runtime-container.ts b/src/app/runtime-container.ts index ec2f2bd..7733df7 100644 --- a/src/app/runtime-container.ts +++ b/src/app/runtime-container.ts @@ -11,29 +11,70 @@ */ import pg from 'pg'; -import { config, updateRuntimeConfig } from '../config.js'; +import { config, updateRuntimeConfig, type CrossEncoderDtype } from '../config.js'; import { AgentTrustRepository } from '../db/agent-trust-repository.js'; import { ClaimRepository } from '../db/claim-repository.js'; import { LinkRepository } from '../db/link-repository.js'; import { MemoryRepository } from '../db/memory-repository.js'; import { EntityRepository } from '../db/repository-entities.js'; import { LessonRepository } from '../db/repository-lessons.js'; +import type { RetrievalProfile } from '../services/retrieval-profiles.js'; import { MemoryService } from '../services/memory-service.js'; /** - * Public runtime configuration subset. Phase 1A exposes the full config - * object for compatibility; later phases will split public runtime config - * from internal policy flags. + * Explicit runtime configuration subset currently needed by the runtime + * container, startup checks, search/runtime seams, and MemoryService deps. * - * NOTE (phase 1a.5): `runtime.config` currently references the same - * module-level singleton that routes, services, and the search pipeline - * all read from directly. There is no per-runtime config copy yet — - * consumers cannot construct two runtimes with different configs because - * the deeper service code (25+ `import { config }` sites across - * routes/, services/) reads the module singleton regardless. Phase 1B - * will thread config through properly and reintroduce a genuine override. + * This is intentionally narrower than the module-level config singleton: + * it describes the config surface already threaded through those seams + * today, without claiming full runtime-wide configurability yet. + * + * NOTE (phase 1a.5): `runtime.config` still references the module-level + * singleton. This branch has hardened several explicit seams around that + * singleton, but there is still no per-runtime config copy or override: + * many deeper services and repositories still import `config` directly, + * so constructing two runtimes with different configs would remain + * dishonest until a later phase removes those singleton reads. */ -export type CoreRuntimeConfig = typeof config; +export interface CoreRuntimeConfig { + adaptiveRetrievalEnabled: boolean; + agenticRetrievalEnabled: boolean; + auditLoggingEnabled: boolean; + consensusMinMemories: number; + consensusValidationEnabled: boolean; + crossEncoderDtype: CrossEncoderDtype; + crossEncoderEnabled: boolean; + crossEncoderModel: string; + embeddingDimensions: number; + entityGraphEnabled: boolean; + entitySearchMinSimilarity: number; + hybridSearchEnabled: boolean; + iterativeRetrievalEnabled: boolean; + lessonsEnabled: boolean; + linkExpansionBeforeMMR: boolean; + linkExpansionEnabled: boolean; + linkExpansionMax: number; + linkSimilarityThreshold: number; + maxSearchResults: number; + mmrEnabled: boolean; + mmrLambda: number; + namespaceClassificationEnabled: boolean; + pprDamping: number; + pprEnabled: boolean; + port: number; + queryAugmentationEnabled: boolean; + queryAugmentationMaxEntities: number; + queryAugmentationMinSimilarity: number; + queryExpansionEnabled: boolean; + queryExpansionMinSimilarity: number; + repairConfidenceFloor: number; + repairDeltaThreshold: number; + repairLoopEnabled: boolean; + repairLoopMinSimilarity: number; + rerankSkipMinGap: number; + rerankSkipTopSimilarity: number; + retrievalProfileSettings: RetrievalProfile; +} /** Repositories constructed by the runtime container. */ export interface CoreRuntimeRepos { @@ -51,6 +92,21 @@ export interface CoreRuntimeServices { } export interface CoreRuntimeConfigRouteAdapter { + current: () => { + retrievalProfile: string; + embeddingProvider: import('../config.js').EmbeddingProviderName; + embeddingModel: string; + llmProvider: import('../config.js').LLMProviderName; + llmModel: string; + clarificationConflictThreshold: number; + maxSearchResults: number; + hybridSearchEnabled: boolean; + iterativeRetrievalEnabled: boolean; + entityGraphEnabled: boolean; + crossEncoderEnabled: boolean; + agenticRetrievalEnabled: boolean; + repairLoopEnabled: boolean; + }; update: (updates: { embeddingProvider?: import('../config.js').EmbeddingProviderName; embeddingModel?: string; @@ -69,12 +125,11 @@ export interface CoreRuntimeConfigRouteAdapter { * `pool` is required — the composition root never reaches around to * import the singleton `pg.Pool` itself. * - * A `config` override is deliberately NOT accepted here. Most downstream - * route and service code still reads config directly from the module - * singleton, so any override passed here would only influence repo - * construction (`entityGraphEnabled`, `lessonsEnabled`) and silently be - * ignored everywhere else — a dishonest contract. Phase 1B will thread - * config through routes and services and reintroduce a genuine override. + * A `config` override is deliberately NOT accepted here. The container + * now owns several explicit config seams, but many downstream services + * and repositories still read the module singleton directly. Accepting + * an override here would therefore apply only partially and misstate the + * current architecture. */ export interface CoreRuntimeDeps { pool: pg.Pool; @@ -92,8 +147,9 @@ export interface CoreRuntime { /** * Compose the core runtime. Instantiates repositories and the memory * service from an explicit pool. Reads the module-level config singleton - * for repo-construction flags so there is a single source of truth - * between the container and the rest of the codebase. No mutation. + * for repo-construction flags and passes that same singleton explicitly + * into MemoryService so the composition root owns the config seam. + * No mutation. */ export function createCoreRuntime(deps: CoreRuntimeDeps): CoreRuntime { const { pool } = deps; @@ -110,11 +166,30 @@ export function createCoreRuntime(deps: CoreRuntimeDeps): CoreRuntime { claims, entities ?? undefined, lessons ?? undefined, + undefined, + config, ); return { config, configRouteAdapter: { + current() { + return { + retrievalProfile: config.retrievalProfile, + embeddingProvider: config.embeddingProvider, + embeddingModel: config.embeddingModel, + llmProvider: config.llmProvider, + llmModel: config.llmModel, + clarificationConflictThreshold: config.clarificationConflictThreshold, + maxSearchResults: config.maxSearchResults, + hybridSearchEnabled: config.hybridSearchEnabled, + iterativeRetrievalEnabled: config.iterativeRetrievalEnabled, + entityGraphEnabled: config.entityGraphEnabled, + crossEncoderEnabled: config.crossEncoderEnabled, + agenticRetrievalEnabled: config.agenticRetrievalEnabled, + repairLoopEnabled: config.repairLoopEnabled, + }; + }, update(updates) { return updateRuntimeConfig(updates); }, diff --git a/src/db/__tests__/consolidation-execution.test.ts b/src/db/__tests__/consolidation-execution.test.ts index e924dcd..db0fe7a 100644 --- a/src/db/__tests__/consolidation-execution.test.ts +++ b/src/db/__tests__/consolidation-execution.test.ts @@ -119,6 +119,32 @@ describe('consolidation execution', () => { expect(version!.actor_model).toBe(config.llmModel); }); + it('preserves the current no-CMO consolidation behavior', async () => { + await seedRelatedMemories(repo); + + const result = await executeConsolidation(repo, claimRepo, TEST_USER, { + affinity: { threshold: 0.5, minClusterSize: 3, beta: 1.0, temporalLambda: 0 }, + }); + + const consolidatedId = result.consolidatedMemoryIds[0]; + const consolidatedMemory = await repo.getMemory(consolidatedId, TEST_USER); + const cmoRows = await pool.query( + `SELECT id + FROM canonical_memory_objects + WHERE user_id = $1 + AND lineage->>'claimVersionId' = ( + SELECT id::text + FROM memory_claim_versions + WHERE user_id = $1 AND memory_id = $2 + )`, + [TEST_USER, consolidatedId], + ); + + expect(consolidatedMemory).not.toBeNull(); + expect(consolidatedMemory!.metadata.cmo_id).toBeUndefined(); + expect(cmoRows.rows).toHaveLength(0); + }); + it('consolidated memory has metadata with source member IDs', async () => { const mem = await seedAndConsolidateFirst(repo, claimRepo); expect(mem!.metadata.consolidated_from).toBeDefined(); diff --git a/src/db/__tests__/test-fixtures.ts b/src/db/__tests__/test-fixtures.ts index d114fe7..cdaad24 100644 --- a/src/db/__tests__/test-fixtures.ts +++ b/src/db/__tests__/test-fixtures.ts @@ -73,8 +73,41 @@ function getSchemaSQL(): string { return raw.replace(/\{\{EMBEDDING_DIMENSIONS\}\}/g, String(config.embeddingDimensions)); } -/** Apply schema to a test database pool. */ +/** + * Return the memories.embedding vector(N) dimension in pgvector's + * atttypmod encoding, or null if the table does not exist or the + * column has no typmod set. Used to detect dim drift before re-running + * the idempotent base schema. + */ +async function readEmbeddingColumnDim(pool: pg.Pool): Promise { + const { rows } = await pool.query<{ typmod: number }>( + `SELECT atttypmod AS typmod + FROM pg_attribute a + JOIN pg_class c ON a.attrelid = c.oid + WHERE c.relname = 'memories' AND a.attname = 'embedding'`, + ); + if (rows.length === 0) return null; + return rows[0].typmod > 0 ? rows[0].typmod : null; +} + +/** + * Apply schema to a test database pool. + * + * The base schema.sql is idempotent (CREATE TABLE IF NOT EXISTS), so + * re-running it cannot change the type of a column that already + * exists. When the test DB was previously initialized with a different + * EMBEDDING_DIMENSIONS (for example, left over from a prior run with + * a different .env.test), the memories.embedding column retains the + * old vector(N) dim and subsequent inserts with the new dim fail at + * the DB level — surfacing as opaque 500s in route tests. Detect that + * drift up front and drop+recreate the public schema so schema.sql + * can rebuild it at the configured dim. + */ export async function setupTestSchema(pool: pg.Pool): Promise { + const existingDim = await readEmbeddingColumnDim(pool); + if (existingDim !== null && existingDim !== config.embeddingDimensions) { + await pool.query('DROP SCHEMA public CASCADE; CREATE SCHEMA public;'); + } const sql = getSchemaSQL(); await pool.query(sql); } diff --git a/src/routes/memories.ts b/src/routes/memories.ts index d4acc78..f63cce4 100644 --- a/src/routes/memories.ts +++ b/src/routes/memories.ts @@ -21,9 +21,26 @@ const ALLOWED_ORIGINS = new Set( ); interface RuntimeConfigRouteAdapter { + current(): RuntimeConfigRouteSnapshot; update(updates: RuntimeConfigRouteUpdates): string[]; } +interface RuntimeConfigRouteSnapshot { + retrievalProfile: string; + embeddingProvider: EmbeddingProviderName; + embeddingModel: string; + llmProvider: LLMProviderName; + llmModel: string; + clarificationConflictThreshold: number; + maxSearchResults: number; + hybridSearchEnabled: boolean; + iterativeRetrievalEnabled: boolean; + entityGraphEnabled: boolean; + crossEncoderEnabled: boolean; + agenticRetrievalEnabled: boolean; + repairLoopEnabled: boolean; +} + interface RuntimeConfigRouteUpdates { embeddingProvider?: EmbeddingProviderName; embeddingModel?: string; @@ -36,6 +53,9 @@ interface RuntimeConfigRouteUpdates { } const defaultRuntimeConfigRouteAdapter: RuntimeConfigRouteAdapter = { + current() { + return readRuntimeConfigRouteSnapshot(); + }, update(updates) { return updateRuntimeConfig(updates); }, @@ -49,12 +69,12 @@ export function createMemoryRouter( registerCors(router); registerIngestRoute(router, service); registerQuickIngestRoute(router, service); - registerSearchRoute(router, service); - registerFastSearchRoute(router, service); + registerSearchRoute(router, service, configRouteAdapter); + registerFastSearchRoute(router, service, configRouteAdapter); registerExpandRoute(router, service); registerListRoute(router, service); registerStatsRoute(router, service); - registerHealthRoute(router); + registerHealthRoute(router, configRouteAdapter); registerConfigRoute(router, configRouteAdapter); registerConsolidateRoute(router, service); registerDecayRoute(router, service); @@ -124,12 +144,18 @@ function registerIngestHandler( }); } -function registerSearchRoute(router: Router, service: MemoryService): void { +function registerSearchRoute( + router: Router, + service: MemoryService, + configRouteAdapter: RuntimeConfigRouteAdapter, +): void { router.post('/search', async (req: Request, res: Response) => { try { const body = parseSearchBody(req.body); const scope = toMemoryScope(body.userId, body.workspace, body.agentScope); - const requestLimit = body.limit === undefined ? undefined : resolveEffectiveSearchLimit(body.limit); + const requestLimit = body.limit === undefined + ? undefined + : resolveEffectiveSearchLimit(body.limit, configRouteAdapter.current()); const retrievalOptions: { retrievalMode?: typeof body.retrievalMode; tokenBudget?: typeof body.tokenBudget; skipRepairLoop?: boolean } = { retrievalMode: body.retrievalMode, tokenBudget: body.tokenBudget, @@ -162,12 +188,18 @@ function registerSearchRoute(router: Router, service: MemoryService): void { * Latency-optimized search endpoint for UC1 (memory injection, <200ms target). * Skips the LLM repair loop which accounts for ~88% of search latency. */ -function registerFastSearchRoute(router: Router, service: MemoryService): void { +function registerFastSearchRoute( + router: Router, + service: MemoryService, + configRouteAdapter: RuntimeConfigRouteAdapter, +): void { router.post('/search/fast', async (req: Request, res: Response) => { try { const body = parseSearchBody(req.body); const scope = toMemoryScope(body.userId, body.workspace, body.agentScope); - const requestLimit = body.limit === undefined ? undefined : resolveEffectiveSearchLimit(body.limit); + const requestLimit = body.limit === undefined + ? undefined + : resolveEffectiveSearchLimit(body.limit, configRouteAdapter.current()); const result = scope.kind === 'workspace' ? await service.workspaceSearch(scope.userId, body.query, body.workspace!, { agentScope: scope.agentScope, @@ -235,9 +267,9 @@ function registerStatsRoute(router: Router, service: MemoryService): void { }); } -function registerHealthRoute(router: Router): void { +function registerHealthRoute(router: Router, configRouteAdapter: RuntimeConfigRouteAdapter): void { router.get('/health', (_req: Request, res: Response) => { - res.json({ status: 'ok', config: formatHealthConfig() }); + res.json({ status: 'ok', config: formatHealthConfig(configRouteAdapter.current()) }); }); } @@ -254,7 +286,11 @@ function registerConfigRoute(router: Router, configRouteAdapter: RuntimeConfigRo clarificationConflictThreshold: req.body.clarification_conflict_threshold, maxSearchResults: req.body.max_search_results, }); - res.json({ applied, config: formatHealthConfig(), note: 'Provider/model changes are applied in-memory for local experimentation.' }); + res.json({ + applied, + config: formatHealthConfig(configRouteAdapter.current()), + note: 'Provider/model changes are applied in-memory for local experimentation.', + }); } catch (err) { handleRouteError(res, 'PUT /memories/config', err); } @@ -600,8 +636,11 @@ function parseOptionalIsoTimestamp(value: unknown): string | undefined { return value; } -function resolveEffectiveSearchLimit(requestedLimit: number | undefined): number { - const maxLimit = config.maxSearchResults; +function resolveEffectiveSearchLimit( + requestedLimit: number | undefined, + runtimeConfig: Pick, +): number { + const maxLimit = runtimeConfig.maxSearchResults; if (requestedLimit === undefined) return maxLimit; return Math.min(requestedLimit, maxLimit); } @@ -633,24 +672,41 @@ function applyCorsHeaders(req: Request, res: Response): void { } -function formatHealthConfig() { +function readRuntimeConfigRouteSnapshot(): RuntimeConfigRouteSnapshot { return { - retrieval_profile: config.retrievalProfile, - embedding_provider: config.embeddingProvider, - embedding_model: config.embeddingModel, - llm_provider: config.llmProvider, - llm_model: config.llmModel, - clarification_conflict_threshold: config.clarificationConflictThreshold, - max_search_results: config.maxSearchResults, - hybrid_search_enabled: config.hybridSearchEnabled, - iterative_retrieval_enabled: config.iterativeRetrievalEnabled, - entity_graph_enabled: config.entityGraphEnabled, - cross_encoder_enabled: config.crossEncoderEnabled, - agentic_retrieval_enabled: config.agenticRetrievalEnabled, - repair_loop_enabled: config.repairLoopEnabled, + retrievalProfile: config.retrievalProfile, + embeddingProvider: config.embeddingProvider, + embeddingModel: config.embeddingModel, + llmProvider: config.llmProvider, + llmModel: config.llmModel, + clarificationConflictThreshold: config.clarificationConflictThreshold, + maxSearchResults: config.maxSearchResults, + hybridSearchEnabled: config.hybridSearchEnabled, + iterativeRetrievalEnabled: config.iterativeRetrievalEnabled, + entityGraphEnabled: config.entityGraphEnabled, + crossEncoderEnabled: config.crossEncoderEnabled, + agenticRetrievalEnabled: config.agenticRetrievalEnabled, + repairLoopEnabled: config.repairLoopEnabled, }; } +function formatHealthConfig(runtimeConfig: RuntimeConfigRouteSnapshot) { + return { + retrieval_profile: runtimeConfig.retrievalProfile, + embedding_provider: runtimeConfig.embeddingProvider, + embedding_model: runtimeConfig.embeddingModel, + llm_provider: runtimeConfig.llmProvider, + llm_model: runtimeConfig.llmModel, + clarification_conflict_threshold: runtimeConfig.clarificationConflictThreshold, + max_search_results: runtimeConfig.maxSearchResults, + hybrid_search_enabled: runtimeConfig.hybridSearchEnabled, + iterative_retrieval_enabled: runtimeConfig.iterativeRetrievalEnabled, + entity_graph_enabled: runtimeConfig.entityGraphEnabled, + cross_encoder_enabled: runtimeConfig.crossEncoderEnabled, + agentic_retrieval_enabled: runtimeConfig.agenticRetrievalEnabled, + repair_loop_enabled: runtimeConfig.repairLoopEnabled, + }; +} function formatSearchResponse(result: RetrievalResult, scope: MemoryScope) { const observability = buildRetrievalObservability(result); return { @@ -698,4 +754,3 @@ function formatSearchResponse(result: RetrievalResult, scope: MemoryScope) { ...(observability ? { observability } : {}), }; } - diff --git a/src/services/__tests__/canonical-memory-lineage.test.ts b/src/services/__tests__/canonical-memory-lineage.test.ts index f0cd8e6..2bd1c9b 100644 --- a/src/services/__tests__/canonical-memory-lineage.test.ts +++ b/src/services/__tests__/canonical-memory-lineage.test.ts @@ -149,6 +149,76 @@ describe('canonical memory lineage', () => { expect(deleteCmoRow.rows[0].lineage.claimVersionId).toBe(claim!.invalidated_by_version_id); }); + it('preserves delete tombstone claim-version invariants', async () => { + const originalConversation = 'original-employer'; + const deleteConversation = 'delete-employer'; + const originalFact = 'User works at OpenAI.'; + const deleteFact = 'User no longer works at OpenAI.'; + const originalAt = new Date('2026-01-02T00:00:00.000Z'); + const deleteAt = new Date('2026-04-02T00:00:00.000Z'); + const baseEmbedding = unitVector(31); + + registerConversation(originalConversation, originalFact, baseEmbedding, 'Works at OpenAI'); + registerConversation(deleteConversation, deleteFact, offsetVector(baseEmbedding, 13, 0.01), 'No longer works at OpenAI'); + + const { memory: originalMemory, version: originalVersion } = await ingestAndCapture(originalConversation, originalAt); + + decisionPlans.set(deleteFact, { + action: 'DELETE', + targetMemoryId: originalMemory!.id, + updatedContent: null, + contradictionConfidence: 0.94, + clarificationNote: null, + }); + + await ctx.service.ingest(TEST_USER, deleteConversation, 'test', 'https://source/delete-employer', deleteAt); + + const claim = await ctx.claimRepo.getClaim(originalVersion!.claim_id, TEST_USER); + const tombstoneVersion = await ctx.claimRepo.getClaimVersion(claim!.invalidated_by_version_id!, TEST_USER); + + expect(tombstoneVersion).not.toBeNull(); + expect(tombstoneVersion!.memory_id).toBeNull(); + expect(tombstoneVersion!.content).toBe(`[DELETED] ${deleteFact}`); + expect(tombstoneVersion!.importance).toBe(0); + expect(tombstoneVersion!.source_site).toBe(''); + expect(tombstoneVersion!.source_url).toBe(''); + expect(tombstoneVersion!.previous_version_id).toBe(originalVersion!.id); + expect(tombstoneVersion!.embedding).toEqual(originalVersion!.embedding); + }); + + it('backfills lineage for a legacy projection without emitting a mutation CMO', async () => { + const { memoryId, target } = await backfillLegacyProjection( + 'Legacy memory without claim lineage.', + unitVector(29), + 0.6, + ); + const claim = await ctx.claimRepo.getClaim(target.claimId, TEST_USER); + const version = await ctx.claimRepo.getClaimVersionByMemoryId(TEST_USER, memoryId); + const cmoRows = await pool.query('SELECT id FROM canonical_memory_objects WHERE user_id = $1', [TEST_USER]); + + expect(target.memoryId).toBe(memoryId); + expect(target.cmoId).toBeNull(); + expect(claim?.current_version_id).toBe(target.versionId); + expect(version?.id).toBe(target.versionId); + expect(cmoRows.rows).toHaveLength(0); + }); + + it('leaves backfilled claim-version provenance fields null', async () => { + const { target } = await backfillLegacyProjection( + 'Legacy fact with no prior claim version.', + unitVector(37), + 0.55, + ); + const version = await ctx.claimRepo.getClaimVersion(target.versionId, TEST_USER); + + expect(version).not.toBeNull(); + expect(version!.mutation_type).toBeNull(); + expect(version!.mutation_reason).toBeNull(); + expect(version!.previous_version_id).toBeNull(); + expect(version!.actor_model).toBeNull(); + expect(version!.contradiction_confidence).toBeNull(); + }); + /** Ingest a conversation and return its first memory, version, and raw result. */ async function ingestAndCapture(conversation: string, timestamp: Date, sourceUrl = 'https://source/original') { const result = await ctx.service.ingest(TEST_USER, conversation, 'test', sourceUrl, timestamp); @@ -157,6 +227,21 @@ describe('canonical memory lineage', () => { return { result, memory, version }; } + /** Create a legacy projection and force the claim-version backfill seam to run. */ + async function backfillLegacyProjection(content: string, embedding: number[], importance: number) { + const memoryId = await ctx.repo.storeMemory({ + userId: TEST_USER, + content, + embedding, + memoryType: 'semantic', + importance, + sourceSite: 'test', + }); + const { ensureClaimTarget } = await import('../memory-storage.js'); + const target = await ensureClaimTarget({ repo: ctx.repo, claims: ctx.claimRepo } as any, TEST_USER, memoryId); + return { memoryId, target }; + } + /** Query a canonical_memory_objects row by id. */ async function queryCmoById(cmoId: string) { return pool.query( diff --git a/src/services/__tests__/composite-grouping.test.ts b/src/services/__tests__/composite-grouping.test.ts index 72064b1..683aacc 100644 --- a/src/services/__tests__/composite-grouping.test.ts +++ b/src/services/__tests__/composite-grouping.test.ts @@ -11,6 +11,7 @@ vi.mock('../../config.js', () => ({ config: { compositeGroupingEnabled: true, compositeMinClusterSize: 2, + compositeMaxClusterSize: 3, compositeSimilarityThreshold: 0.55, }, })); @@ -59,20 +60,21 @@ describe('buildComposites', () => { expect(composite.keywords).toContain('strict'); }); - it('produces a non-empty L1 overview for composites with 3+ facts', () => { - const embeddings = similarEmbeddings(2, 4); + it('produces a non-empty L1 overview when joined content exceeds truncation threshold', () => { + const embeddings = similarEmbeddings(2, 3); + // Multi-sentence facts so the joined content has >3 sentences within the + // compositeMaxClusterSize cap (3 facts × 2 sentences = 6 sentences joined). const facts: CompositeInput[] = [ - { memoryId: 'a', content: 'User is building a React application for personal finance tracking.', embedding: embeddings[0], importance: 0.7, keywords: ['React'], headline: 'Finance tracker' }, - { memoryId: 'b', content: 'The finance tracker uses Supabase for the backend database layer.', embedding: embeddings[1], importance: 0.6, keywords: ['Supabase'], headline: 'Supabase backend' }, - { memoryId: 'c', content: 'Tailwind CSS handles all styling in the finance tracker project.', embedding: embeddings[2], importance: 0.5, keywords: ['Tailwind'], headline: 'Tailwind styling' }, - { memoryId: 'd', content: 'User plans to deploy the finance tracker on Vercel hosting platform.', embedding: embeddings[3], importance: 0.5, keywords: ['Vercel'], headline: 'Vercel deployment' }, + { memoryId: 'a', content: 'User is building a React application. It tracks personal finances.', embedding: embeddings[0], importance: 0.7, keywords: ['React'], headline: 'Finance tracker' }, + { memoryId: 'b', content: 'The backend uses Supabase. It provides the database layer.', embedding: embeddings[1], importance: 0.6, keywords: ['Supabase'], headline: 'Supabase backend' }, + { memoryId: 'c', content: 'Tailwind CSS handles styling. The project uses utility classes.', embedding: embeddings[2], importance: 0.5, keywords: ['Tailwind'], headline: 'Tailwind styling' }, ]; const composites = buildComposites(facts); expect(composites.length).toBe(1); const composite = composites[0]; - // With 4 sentences joined, generateL1Overview should truncate to first 2-3 + // 6 sentences joined; generateL1Overview truncates to first 3 expect(composite.overview).not.toBe(''); expect(composite.overview.length).toBeLessThan(composite.content.length); }); @@ -114,6 +116,31 @@ describe('buildComposites', () => { expect(composites[0].memberMemoryIds).not.toContain('c'); }); + it('caps cluster size at compositeMaxClusterSize', () => { + // 5 highly similar facts — without the cap all 5 would land in one cluster. + // With compositeMaxClusterSize=3 the first cluster fills to 3 and the + // remaining 2 spill into a second cluster (which meets minClusterSize=2). + const embeddings = similarEmbeddings(7, 5); + const facts: CompositeInput[] = embeddings.map((emb, i) => ({ + memoryId: String.fromCharCode(97 + i), + content: `Similar fact number ${i + 1} about the same topic.`, + embedding: emb, + importance: 0.5, + keywords: ['topic'], + headline: `Fact ${i + 1}`, + })); + + const composites = buildComposites(facts); + + // Every composite must respect the cap + for (const composite of composites) { + expect(composite.memberMemoryIds.length).toBeLessThanOrEqual(3); + } + // All 5 facts should still be accounted for across composites + const allMembers = composites.flatMap((c) => c.memberMemoryIds); + expect(allMembers.sort()).toEqual(['a', 'b', 'c', 'd', 'e']); + }); + it('returns empty array when fewer facts than minClusterSize', () => { const facts: CompositeInput[] = [ { memoryId: 'a', content: 'Single fact.', embedding: fakeEmbedding(1), importance: 0.5, keywords: [], headline: 'Single' }, diff --git a/src/services/__tests__/memory-crud-runtime-config.test.ts b/src/services/__tests__/memory-crud-runtime-config.test.ts new file mode 100644 index 0000000..25e29a4 --- /dev/null +++ b/src/services/__tests__/memory-crud-runtime-config.test.ts @@ -0,0 +1,67 @@ +/** + * Runtime config seam tests for memory-crud consolidation delegation. + * + * Verifies that the service-layer CRUD helper forwards deps.config into the + * consolidation execution seam instead of letting lineage fall back to the + * module singleton. + */ + +import { describe, expect, it, vi } from 'vitest'; + +const { mockExecuteConsolidation } = vi.hoisted(() => ({ + mockExecuteConsolidation: vi.fn().mockResolvedValue({ + clustersConsolidated: 0, + memoriesArchived: 0, + memoriesCreated: 0, + consolidatedMemoryIds: [], + }), +})); + +vi.mock('../../config.js', () => ({ + config: { + auditLoggingEnabled: false, + decayRetentionThreshold: 0.5, + decayMinAgeDays: 30, + }, +})); +vi.mock('../consolidation-service.js', () => ({ + findConsolidationCandidates: vi.fn(), + executeConsolidation: mockExecuteConsolidation, +})); +vi.mock('../memory-lifecycle.js', () => ({ + evaluateDecayCandidates: vi.fn(), + checkMemoryCap: vi.fn(), +})); +vi.mock('../audit-events.js', () => ({ emitAuditEvent: vi.fn() })); +vi.mock('../deferred-audn.js', () => ({ + shouldDeferAudn: vi.fn(), + deferMemoryForReconciliation: vi.fn(), + reconcileUser: vi.fn(), + reconcileAll: vi.fn(), + getReconciliationStatus: vi.fn(), +})); +vi.mock('../claim-slotting.js', () => ({ + buildPersistedRelationClaimSlot: vi.fn(), +})); + +const { performExecuteConsolidation } = await import('../memory-crud.js'); + +describe('memory-crud runtime config seam', () => { + it('passes deps.config into executeConsolidation', async () => { + const deps = { + repo: { kind: 'repo' }, + claims: { kind: 'claims' }, + config: { llmModel: 'runtime-llm' }, + } as any; + + await performExecuteConsolidation(deps, 'user-1'); + + expect(mockExecuteConsolidation).toHaveBeenCalledWith( + deps.repo, + deps.claims, + 'user-1', + undefined, + deps.config, + ); + }); +}); diff --git a/src/services/__tests__/memory-ingest-runtime-config.test.ts b/src/services/__tests__/memory-ingest-runtime-config.test.ts new file mode 100644 index 0000000..ad7f1b6 --- /dev/null +++ b/src/services/__tests__/memory-ingest-runtime-config.test.ts @@ -0,0 +1,276 @@ +/** + * Runtime config seam tests for memory-ingest. + * + * Verifies that memory-ingest uses deps.config for the already-threaded + * quick-ingest, entropy-gate, and composite-grouping seams. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { + mockGenerateLinks, + mockConsensusExtractFacts, + mockComputeEntropyScore, + mockBuildComposites, + mockFindFilteredCandidates, +} = vi.hoisted(() => ({ + mockGenerateLinks: vi.fn(), + mockConsensusExtractFacts: vi.fn(), + mockComputeEntropyScore: vi.fn(), + mockBuildComposites: vi.fn(), + mockFindFilteredCandidates: vi.fn(), +})); +const { mockStoreCanonicalFact } = vi.hoisted(() => ({ + mockStoreCanonicalFact: vi.fn(), +})); + +const moduleConfig = { + audnCandidateThreshold: 0.7, + compositeGroupingEnabled: false, + compositeMinClusterSize: 99, + entropyGateAlpha: 0.4, + entropyGateEnabled: false, + entropyGateThreshold: 0.9, + fastAudnEnabled: false, + fastAudnDuplicateThreshold: 0.95, +}; + +vi.mock('../../config.js', () => ({ config: moduleConfig })); +vi.mock('../search-pipeline.js', () => ({ generateLinks: mockGenerateLinks })); +vi.mock('../quick-extraction.js', () => ({ + quickExtractFacts: vi.fn(() => [ + { + fact: 'User prefers Rust', + headline: 'Prefers Rust', + importance: 0.8, + type: 'preference', + keywords: ['rust'], + entities: [], + relations: [], + }, + ]), +})); +vi.mock('../embedding.js', () => ({ + embedText: vi.fn().mockResolvedValue([0.1, 0.2]), +})); +vi.mock('../write-security.js', () => ({ + assessWriteSecurity: vi.fn(() => ({ + allowed: true, + trust: { score: 0.9 }, + })), + recordRejectedWrite: vi.fn(), +})); +vi.mock('../memory-storage.js', () => ({ + resolveDeterministicClaimSlot: vi.fn().mockResolvedValue(null), + findSlotConflictCandidates: vi.fn().mockResolvedValue([]), + storeCanonicalFact: mockStoreCanonicalFact, +})); +vi.mock('../conflict-policy.js', () => ({ + mergeCandidates: vi.fn((vectorCandidates: unknown[], slotCandidates: unknown[]) => [ + ...vectorCandidates, + ...slotCandidates, + ]), + applyClarificationOverrides: vi.fn(), +})); +vi.mock('../timing.js', () => ({ + timed: vi.fn(async (_name: string, fn: () => unknown) => fn()), +})); +vi.mock('../consensus-extraction.js', () => ({ + consensusExtractFacts: mockConsensusExtractFacts, +})); +vi.mock('../extraction-cache.js', () => ({ + cachedResolveAUDN: vi.fn(), +})); +vi.mock('../memory-network.js', () => ({ + classifyNetwork: vi.fn(), +})); +vi.mock('../namespace-retrieval.js', () => ({ + inferNamespace: vi.fn(), + deriveMajorityNamespace: vi.fn(), +})); +vi.mock('../entropy-gate.js', () => ({ + computeEntropyScore: mockComputeEntropyScore, +})); +vi.mock('../composite-grouping.js', () => ({ + buildComposites: mockBuildComposites, +})); +vi.mock('../memory-audn.js', () => ({ + findFilteredCandidates: mockFindFilteredCandidates, + resolveAndExecuteAudn: vi.fn(), +})); + +const { performIngest, performQuickIngest } = await import('../memory-ingest.js'); + +describe('memory-ingest runtime config seam', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockGenerateLinks.mockResolvedValue(1); + mockStoreCanonicalFact.mockResolvedValue({ outcome: 'stored', memoryId: 'memory-1' }); + mockConsensusExtractFacts.mockResolvedValue([ + { + fact: 'User prefers Rust', + headline: 'Prefers Rust', + importance: 0.8, + type: 'preference', + keywords: ['rust'], + entities: [], + relations: [], + }, + ]); + mockComputeEntropyScore.mockReturnValue({ accepted: true }); + mockBuildComposites.mockReturnValue([]); + mockFindFilteredCandidates.mockResolvedValue([]); + }); + + it('passes deps.config into generateLinks during quick ingest', async () => { + const runtimeConfig = { + linkExpansionEnabled: true, + linkSimilarityThreshold: 0.42, + }; + const repo = { + storeEpisode: vi.fn().mockResolvedValue('episode-1'), + findNearDuplicates: vi.fn().mockResolvedValue([]), + }; + const deps = { + config: runtimeConfig, + repo, + claims: {}, + entities: null, + lessons: null, + observationService: null, + uriResolver: {}, + } as any; + + const result = await performQuickIngest( + deps, + 'user-1', + 'User: I prefer Rust', + 'chat', + ); + + expect(result.linksCreated).toBe(1); + expect(mockGenerateLinks).toHaveBeenCalledWith( + repo, + 'user-1', + ['memory-1'], + new Map([['memory-1', [0.1, 0.2]]]), + runtimeConfig, + ); + }); + + it('uses deps.config for duplicate thresholds in quick ingest', async () => { + const runtimeConfig = { + audnCandidateThreshold: 0.42, + fastAudnEnabled: true, + fastAudnDuplicateThreshold: 0.83, + linkExpansionEnabled: false, + linkSimilarityThreshold: 0.5, + }; + const repo = { + storeEpisode: vi.fn().mockResolvedValue('episode-1'), + findNearDuplicates: vi.fn().mockResolvedValue([ + { id: 'existing-1', content: 'User prefers Rust', similarity: 0.9, importance: 0.8 }, + ]), + }; + const deps = { + config: runtimeConfig, + repo, + claims: {}, + entities: null, + lessons: null, + observationService: null, + uriResolver: {}, + } as any; + + const result = await performQuickIngest(deps, 'user-1', 'User: I prefer Rust', 'chat'); + + expect(repo.findNearDuplicates).toHaveBeenCalledWith('user-1', [0.1, 0.2], 0.42); + expect(result.memoriesSkipped).toBe(1); + expect(result.memoryIds).toEqual(['existing-1']); + expect(mockStoreCanonicalFact).not.toHaveBeenCalled(); + }); + + it('uses deps.config for entropy gate parameters during ingest', async () => { + const runtimeConfig = { + audnCandidateThreshold: 0.42, + auditLoggingEnabled: false, + compositeGroupingEnabled: false, + compositeMinClusterSize: 99, + entityGraphEnabled: false, + entropyGateAlpha: 0.73, + entropyGateEnabled: true, + entropyGateThreshold: 0.21, + fastAudnEnabled: false, + fastAudnDuplicateThreshold: 0.83, + lessonsEnabled: false, + llmModel: 'runtime-llm', + linkExpansionEnabled: false, + linkSimilarityThreshold: 0.5, + }; + const repo = { + storeEpisode: vi.fn().mockResolvedValue('episode-1'), + backdateMemories: vi.fn(), + }; + const deps = { + config: runtimeConfig, + repo, + claims: {}, + entities: null, + lessons: null, + observationService: null, + uriResolver: {}, + } as any; + + await performIngest(deps, 'user-1', 'User: I prefer Rust', 'chat'); + + expect(mockComputeEntropyScore).toHaveBeenCalledWith( + expect.objectContaining({ + windowEntities: ['rust'], + windowEmbedding: [0.1, 0.2], + }), + { threshold: 0.21, alpha: 0.73 }, + ); + }); + + it('uses deps.config for composite grouping gate during ingest', async () => { + const runtimeConfig = { + audnCandidateThreshold: 0.42, + auditLoggingEnabled: false, + compositeGroupingEnabled: true, + compositeMinClusterSize: 1, + entityGraphEnabled: false, + entropyGateAlpha: 0.73, + entropyGateEnabled: false, + entropyGateThreshold: 0.21, + fastAudnEnabled: false, + fastAudnDuplicateThreshold: 0.83, + lessonsEnabled: false, + llmModel: 'runtime-llm', + linkExpansionEnabled: false, + linkSimilarityThreshold: 0.5, + }; + const repo = { + storeEpisode: vi.fn().mockResolvedValue('episode-1'), + backdateMemories: vi.fn(), + }; + const deps = { + config: runtimeConfig, + repo, + claims: {}, + entities: null, + lessons: null, + observationService: null, + uriResolver: {}, + } as any; + + const result = await performIngest(deps, 'user-1', 'User: I prefer Rust', 'chat'); + + expect(mockBuildComposites).toHaveBeenCalledWith([ + expect.objectContaining({ + memoryId: 'memory-1', + content: 'User prefers Rust', + }), + ]); + expect(result.compositesCreated).toBe(0); + }); +}); diff --git a/src/services/__tests__/memory-lineage-runtime-config.test.ts b/src/services/__tests__/memory-lineage-runtime-config.test.ts new file mode 100644 index 0000000..14ac6da --- /dev/null +++ b/src/services/__tests__/memory-lineage-runtime-config.test.ts @@ -0,0 +1,55 @@ +/** + * Runtime config seam tests for memory-lineage. + * + * Verifies that ingest-side lineage emission uses the explicit runtime + * llmModel when provided, instead of silently pinning actor_model to the + * module singleton. + */ + +import { describe, expect, it, vi } from 'vitest'; + +import { emitLineageEvent } from '../memory-lineage.js'; + +describe('memory-lineage runtime config seam', () => { + it('uses the explicit llmModel for canonical add provenance', async () => { + const claims = { + createClaim: vi.fn().mockResolvedValue('claim-1'), + createClaimVersion: vi.fn().mockResolvedValue('version-1'), + setClaimCurrentVersion: vi.fn().mockResolvedValue(undefined), + addEvidence: vi.fn().mockResolvedValue(undefined), + createUpdateVersion: vi.fn(), + supersedeClaimVersion: vi.fn(), + invalidateClaim: vi.fn(), + }; + const repo = { + storeCanonicalMemoryObject: vi.fn().mockResolvedValue('cmo-1'), + }; + + await emitLineageEvent({ claims, repo, config: { llmModel: 'runtime-llm' } }, { + kind: 'canonical-add', + userId: 'user-1', + fact: { + fact: 'User prefers Rust.', + headline: 'Prefers Rust', + importance: 0.9, + type: 'preference', + keywords: ['rust'], + entities: [], + relations: [], + }, + embedding: [0.1, 0.2], + sourceSite: 'chat', + sourceUrl: 'https://source/test', + episodeId: 'episode-1', + logicalTimestamp: undefined, + claimSlot: null, + createProjection: vi.fn().mockResolvedValue('memory-1'), + }); + + expect(claims.createClaimVersion).toHaveBeenCalledWith( + expect.objectContaining({ + provenance: expect.objectContaining({ actorModel: 'runtime-llm' }), + }), + ); + }); +}); diff --git a/src/services/__tests__/memory-search-runtime-config.test.ts b/src/services/__tests__/memory-search-runtime-config.test.ts new file mode 100644 index 0000000..c8da8bd --- /dev/null +++ b/src/services/__tests__/memory-search-runtime-config.test.ts @@ -0,0 +1,105 @@ +/** + * Runtime config seam tests for memory-search. + * + * Verifies that performSearch threads deps.config into the search pipeline + * and uses that same runtime-owned config to gate request-time lessons, + * consensus validation, and audit side effects. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { createSearchResult } from './test-fixtures.js'; + +const { + mockCheckLessons, + mockValidateConsensus, + mockEmitAuditEvent, + mockRunSearchPipelineWithTrace, +} = vi.hoisted(() => ({ + mockCheckLessons: vi.fn(), + mockValidateConsensus: vi.fn(), + mockEmitAuditEvent: vi.fn(), + mockRunSearchPipelineWithTrace: vi.fn(), +})); + +vi.mock('../lesson-service.js', () => ({ + checkLessons: mockCheckLessons, + recordContradictionLesson: vi.fn(), +})); +vi.mock('../consensus-validation.js', () => ({ validateConsensus: mockValidateConsensus })); +vi.mock('../audit-events.js', () => ({ emitAuditEvent: mockEmitAuditEvent })); +vi.mock('../retrieval-policy.js', () => ({ + resolveSearchLimitDetailed: vi.fn(() => ({ + limit: 5, + classification: { label: 'simple', matchedMarker: null }, + })), + classifyQueryDetailed: vi.fn(() => ({ label: 'simple' })), +})); +vi.mock('../search-pipeline.js', () => ({ + runSearchPipelineWithTrace: mockRunSearchPipelineWithTrace, +})); +vi.mock('../composite-staleness.js', () => ({ + excludeStaleComposites: vi.fn(async (_repo, _userId, memories) => ({ + filtered: memories, + removedCompositeIds: [], + })), +})); + +const { performSearch } = await import('../memory-search.js'); + +function createTrace() { + return { + event: vi.fn(), + stage: vi.fn(), + finalize: vi.fn(), + setPackagingSummary: vi.fn(), + setAssemblySummary: vi.fn(), + setRetrievalSummary: vi.fn(), + getRetrievalSummary: vi.fn(() => undefined), + }; +} + +function createDeps(runtimeConfig: { + lessonsEnabled: boolean; + consensusValidationEnabled: boolean; + consensusMinMemories: number; + auditLoggingEnabled: boolean; +}) { + return { + config: runtimeConfig, + repo: { touchMemory: vi.fn().mockResolvedValue(undefined) }, + claims: {}, + entities: null, + lessons: {}, + observationService: null, + uriResolver: { resolve: vi.fn().mockResolvedValue(null), format: vi.fn() }, + } as any; +} + +describe('performSearch runtime config seam', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockCheckLessons.mockResolvedValue({ safe: true }); + mockValidateConsensus.mockResolvedValue({ removedMemoryIds: [], judgments: [] }); + mockRunSearchPipelineWithTrace.mockResolvedValue({ + filtered: [createSearchResult({ id: 'memory-1', content: 'alpha result', score: 0.9 })], + trace: createTrace(), + }); + }); + + it('threads deps.config into the pipeline and gates request-time side effects from it', async () => { + const runtimeConfig = { + lessonsEnabled: false, + consensusValidationEnabled: false, + consensusMinMemories: 2, + auditLoggingEnabled: false, + }; + + const result = await performSearch(createDeps(runtimeConfig), 'user-1', 'find alpha'); + + expect(result.memories).toHaveLength(1); + expect(mockRunSearchPipelineWithTrace.mock.calls[0]?.[7]?.runtimeConfig).toBe(runtimeConfig); + expect(mockCheckLessons).not.toHaveBeenCalled(); + expect(mockValidateConsensus).not.toHaveBeenCalled(); + expect(mockEmitAuditEvent).not.toHaveBeenCalled(); + }); +}); diff --git a/src/services/__tests__/memory-service-config.test.ts b/src/services/__tests__/memory-service-config.test.ts new file mode 100644 index 0000000..84d34fb --- /dev/null +++ b/src/services/__tests__/memory-service-config.test.ts @@ -0,0 +1,227 @@ +/** + * MemoryService config seam tests. + * + * Verifies that the service can thread an explicit runtime config into its + * delegated modules while preserving the current singleton default when no + * override is provided. + */ + +import { describe, expect, it, vi } from 'vitest'; + +const { + mockPerformSearch, + mockPerformIngest, + mockPerformQuickIngest, + mockPerformWorkspaceIngest, +} = vi.hoisted(() => ({ + mockPerformSearch: vi.fn(), + mockPerformIngest: vi.fn(), + mockPerformQuickIngest: vi.fn(), + mockPerformWorkspaceIngest: vi.fn(), +})); + +const moduleConfig = { + lessonsEnabled: true, + consensusValidationEnabled: true, + consensusMinMemories: 2, + auditLoggingEnabled: true, +}; + +vi.mock('../../config.js', () => ({ config: moduleConfig })); +vi.mock('../memory-ingest.js', () => ({ + performIngest: mockPerformIngest, + performQuickIngest: mockPerformQuickIngest, + performStoreVerbatim: vi.fn(), + performWorkspaceIngest: mockPerformWorkspaceIngest, +})); +vi.mock('../memory-search.js', () => ({ + performSearch: mockPerformSearch, + performFastSearch: vi.fn(), + performWorkspaceSearch: vi.fn(), +})); +vi.mock('../memory-crud.js', () => ({})); +vi.mock('../atomicmem-uri.js', () => ({ + URIResolver: class { + resolve = vi.fn(); + format = vi.fn(); + }, +})); + +const { MemoryService } = await import('../memory-service.js'); + +describe('MemoryService config seam', () => { + it('threads an explicit runtime config into delegated search deps', async () => { + const runtimeConfig = { + lessonsEnabled: false, + consensusValidationEnabled: false, + consensusMinMemories: 5, + auditLoggingEnabled: false, + }; + mockPerformSearch.mockResolvedValue({ + memories: [], + injectionText: '', + citations: [], + retrievalMode: 'flat', + }); + const service = new MemoryService( + {} as any, + {} as any, + undefined, + undefined, + undefined, + runtimeConfig as any, + ); + + await service.search('user-1', 'config seam query'); + + expect(mockPerformSearch).toHaveBeenCalledWith( + expect.objectContaining({ config: runtimeConfig }), + 'user-1', + 'config seam query', + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + ); + }); + + it('threads an explicit runtime config into delegated ingest deps', async () => { + const runtimeConfig = { + lessonsEnabled: false, + consensusValidationEnabled: false, + consensusMinMemories: 5, + auditLoggingEnabled: false, + }; + mockPerformIngest.mockResolvedValue({ + episodeId: 'ep-1', + factsExtracted: 0, + stored: 0, + skipped: 0, + linksCreated: 0, + compositesCreated: 0, + }); + const service = new MemoryService( + {} as any, + {} as any, + undefined, + undefined, + undefined, + runtimeConfig as any, + ); + + await service.ingest('user-1', 'text', 'site'); + + expect(mockPerformIngest).toHaveBeenCalledWith( + expect.objectContaining({ config: runtimeConfig }), + 'user-1', + 'text', + 'site', + '', + undefined, + ); + }); + + it('threads an explicit runtime config into delegated quick-ingest deps', async () => { + const runtimeConfig = { + lessonsEnabled: false, + consensusValidationEnabled: false, + consensusMinMemories: 5, + auditLoggingEnabled: false, + }; + mockPerformQuickIngest.mockResolvedValue({ + episodeId: 'ep-1', + factsExtracted: 0, + stored: 0, + skipped: 0, + linksCreated: 0, + compositesCreated: 0, + }); + const service = new MemoryService( + {} as any, + {} as any, + undefined, + undefined, + undefined, + runtimeConfig as any, + ); + + await service.quickIngest('user-1', 'text', 'site'); + + expect(mockPerformQuickIngest).toHaveBeenCalledWith( + expect.objectContaining({ config: runtimeConfig }), + 'user-1', + 'text', + 'site', + '', + undefined, + ); + }); + + it('threads an explicit runtime config into delegated workspace-ingest deps', async () => { + const runtimeConfig = { + lessonsEnabled: false, + consensusValidationEnabled: false, + consensusMinMemories: 5, + auditLoggingEnabled: false, + }; + const workspace = { + workspaceId: 'ws-1', + agentId: 'agent-1', + visibility: 'workspace', + }; + mockPerformWorkspaceIngest.mockResolvedValue({ + episodeId: 'ep-1', + factsExtracted: 0, + stored: 0, + skipped: 0, + linksCreated: 0, + compositesCreated: 0, + }); + const service = new MemoryService( + {} as any, + {} as any, + undefined, + undefined, + undefined, + runtimeConfig as any, + ); + + await service.workspaceIngest('user-1', 'text', 'site', '', workspace as any); + + expect(mockPerformWorkspaceIngest).toHaveBeenCalledWith( + expect.objectContaining({ config: runtimeConfig }), + 'user-1', + 'text', + 'site', + '', + workspace, + undefined, + ); + }); + + it('defaults delegated search deps to the module config singleton', async () => { + mockPerformSearch.mockResolvedValue({ + memories: [], + injectionText: '', + citations: [], + retrievalMode: 'flat', + }); + const service = new MemoryService({} as any, {} as any); + + await service.search('user-1', 'default config query'); + + expect(mockPerformSearch).toHaveBeenCalledWith( + expect.objectContaining({ config: moduleConfig }), + 'user-1', + 'default config query', + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + ); + }); +}); diff --git a/src/services/__tests__/memory-storage-runtime-config.test.ts b/src/services/__tests__/memory-storage-runtime-config.test.ts new file mode 100644 index 0000000..db1a546 --- /dev/null +++ b/src/services/__tests__/memory-storage-runtime-config.test.ts @@ -0,0 +1,76 @@ +/** + * Runtime config seam tests for memory-storage. + * + * Verifies that namespace classification decisions come from the explicit + * runtime config passed through MemoryService deps, not the module singleton. + */ + +import { afterEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../namespace-retrieval.js', () => ({ + classifyNamespace: vi.fn(), + inferNamespace: vi.fn(), +})); + +import { config } from '../../config.js'; +import { storeProjection } from '../memory-storage.js'; +import { classifyNamespace, inferNamespace } from '../namespace-retrieval.js'; + +describe('memory-storage runtime config seam', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('uses deps.config.namespaceClassificationEnabled instead of the singleton flag', async () => { + const originalNamespaceClassificationEnabled = config.namespaceClassificationEnabled; + config.namespaceClassificationEnabled = false; + + vi.mocked(classifyNamespace).mockResolvedValue('runtime.namespace'); + vi.mocked(inferNamespace).mockReturnValue('singleton.namespace'); + + const deps = { + config: { + namespaceClassificationEnabled: true, + auditLoggingEnabled: false, + }, + repo: { + storeMemory: vi.fn().mockResolvedValue('memory-1'), + storeAtomicFacts: vi.fn().mockResolvedValue(undefined), + storeForesight: vi.fn().mockResolvedValue(undefined), + }, + } as any; + + try { + await storeProjection( + deps, + 'user-1', + { + fact: 'User prefers PostgreSQL.', + headline: 'Prefers PostgreSQL', + importance: 0.8, + type: 'knowledge', + keywords: ['postgresql'], + entities: [], + relations: [], + }, + [0.1, 0.2], + 'chat.openai.com', + 'https://chat.example/test', + 'episode-1', + 0.95, + ); + } finally { + config.namespaceClassificationEnabled = originalNamespaceClassificationEnabled; + } + + expect(classifyNamespace).toHaveBeenCalledWith( + 'User prefers PostgreSQL.', + 'chat.openai.com', + ['postgresql'], + ); + expect(inferNamespace).not.toHaveBeenCalled(); + expect(deps.repo.storeMemory).toHaveBeenCalledWith( + expect.objectContaining({ namespace: 'runtime.namespace' }), + ); + }); +}); diff --git a/src/services/__tests__/query-augmentation.test.ts b/src/services/__tests__/query-augmentation.test.ts index 4b3569b..c172a36 100644 --- a/src/services/__tests__/query-augmentation.test.ts +++ b/src/services/__tests__/query-augmentation.test.ts @@ -89,6 +89,28 @@ describe('augmentQueryWithEntities', () => { ); }); + it('prefers explicit runtime config over module config thresholds', async () => { + mockConfig.queryAugmentationMaxEntities = 1; + mockConfig.queryAugmentationMinSimilarity = 0.95; + const entityRepo = createMockEntityRepo([]); + + await augmentQueryWithEntities( + entityRepo, + 'user-1', + 'override query', + [0.4, 0.4], + { + queryExpansionMinSimilarity: 0.5, + queryAugmentationMaxEntities: 4, + queryAugmentationMinSimilarity: 0.25, + }, + ); + + expect(entityRepo.searchEntities).toHaveBeenCalledWith( + 'user-1', [0.4, 0.4], 4, 0.25, + ); + }); + it('includes entity type and similarity in metadata', async () => { const entityRepo = createMockEntityRepo([ { name: 'Redis', entity_type: 'tool', similarity: 0.85 }, diff --git a/src/services/__tests__/query-expansion.test.ts b/src/services/__tests__/query-expansion.test.ts index d378bec..4464b1e 100644 --- a/src/services/__tests__/query-expansion.test.ts +++ b/src/services/__tests__/query-expansion.test.ts @@ -9,7 +9,8 @@ vi.mock('../../config.js', () => ({ config: { queryExpansionMinSimilarity: 0.5 } vi.mock('../llm.js', () => ({ llm: { chat: vi.fn() } })); vi.mock('../embedding.js', () => ({ embedText: vi.fn().mockResolvedValue([0.1, 0.2]) })); -const { parseQueryTerms } = await import('../query-expansion.js'); +const { parseQueryTerms, expandQueryViaEntities } = await import('../query-expansion.js'); +const { llm } = await import('../llm.js'); describe('parseQueryTerms', () => { it('parses valid JSON with entities and concepts', () => { @@ -66,3 +67,25 @@ describe('parseQueryTerms', () => { expect(result.concepts).toEqual(['caching']); }); }); + +describe('expandQueryViaEntities runtime config', () => { + it('prefers explicit runtime config over module config for expansion similarity threshold', async () => { + (llm.chat as any).mockResolvedValue('{"entities":["Acme"],"concepts":[]}'); + const searchEntities = vi.fn().mockResolvedValue([]); + const entityRepo = { searchEntities } as any; + const repo = {} as any; + + await expandQueryViaEntities( + entityRepo, repo, 'user-1', 'Acme question', [0.1, 0.2], new Set(), 20, + { + queryExpansionMinSimilarity: 0.88, + queryAugmentationMaxEntities: 5, + queryAugmentationMinSimilarity: 0.4, + }, + ); + + expect(searchEntities).toHaveBeenCalledWith( + 'user-1', expect.any(Array), expect.any(Number), 0.88, + ); + }); +}); diff --git a/src/services/__tests__/retrieval-format.test.ts b/src/services/__tests__/retrieval-format.test.ts index 6f914ee..8a3a161 100644 --- a/src/services/__tests__/retrieval-format.test.ts +++ b/src/services/__tests__/retrieval-format.test.ts @@ -93,6 +93,31 @@ describe('formatInjection', () => { mockConfig.stagedLoadingEnabled = false; }); + it('prefers explicit staged-loading option over module config', () => { + mockConfig.stagedLoadingEnabled = false; + const result = formatInjection( + [makeResult({ summary: 'short summary' })], + { stagedLoadingEnabled: true }, + ); + + expect(result).toContain('mode="staged"'); + expect(result).toContain('short summary'); + expect(result).toContain('expand_hint'); + }); + + it('prefers explicit full-loading option over enabled module config', () => { + mockConfig.stagedLoadingEnabled = true; + const result = formatInjection( + [makeResult({ content: 'full content', summary: 'short summary' })], + { stagedLoadingEnabled: false }, + ); + + expect(result).not.toContain('mode="staged"'); + expect(result).not.toContain('expand_hint'); + expect(result).toContain('full content'); + mockConfig.stagedLoadingEnabled = false; + }); + it('staged mode truncates content when no summary', () => { mockConfig.stagedLoadingEnabled = true; const longContent = 'A'.repeat(100); diff --git a/src/services/__tests__/retrieval-policy.test.ts b/src/services/__tests__/retrieval-policy.test.ts index 5ee6340..bb20e85 100644 --- a/src/services/__tests__/retrieval-policy.test.ts +++ b/src/services/__tests__/retrieval-policy.test.ts @@ -4,10 +4,36 @@ * repair acceptance decisions, result merging, and rerank depth. */ -import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; +import type { RetrievalProfile } from '../retrieval-profiles.js'; import type { SearchResult } from '../../db/memory-repository.js'; import { createSearchResult } from './test-fixtures.js'; +const retrievalProfileSettings: RetrievalProfile = { + name: 'balanced', + maxSearchResults: 10, + repairLoopEnabled: true, + adaptiveRetrievalEnabled: true, + hybridSearchEnabled: false, + repairLoopMinSimilarity: 0.3, + repairSkipSimilarity: 0.55, + rerankDepth: 20, + repairPrimaryWeight: 1.0, + repairRewriteWeight: 0.8, + lexicalWeight: 0.8, + mmrEnabled: true, + mmrLambda: 0.85, + linkExpansionEnabled: true, + linkExpansionMax: 3, + linkSimilarityThreshold: 0.5, + scoringWeightSimilarity: 2.0, + scoringWeightImportance: 1.0, + scoringWeightRecency: 1.0, + linkExpansionBeforeMMR: false, + repairDeltaThreshold: 0, + repairConfidenceFloor: 0, +}; + const mockConfig = { adaptiveRetrievalEnabled: true, maxSearchResults: 10, @@ -16,11 +42,7 @@ const mockConfig = { repairSkipSimilarity: 0.55, repairDeltaThreshold: 0, repairConfidenceFloor: 0, - retrievalProfileSettings: { - repairPrimaryWeight: 1.0, - repairRewriteWeight: 0.8, - rerankDepth: 20, - }, + retrievalProfileSettings, }; vi.mock('../../config.js', () => ({ @@ -45,24 +67,24 @@ function makeResult(overrides: Partial = {}) { describe('resolveSearchLimit', () => { it('uses explicit limit when provided', () => { - expect(resolveSearchLimit('anything', 5)).toBe(5); + expect(resolveSearchLimit('anything', 5, mockConfig)).toBe(5); }); it('clamps explicit limit to maxSearchResults', () => { - expect(resolveSearchLimit('anything', 100)).toBe(10); + expect(resolveSearchLimit('anything', 100, mockConfig)).toBe(10); }); it('clamps explicit limit to minimum 1', () => { - expect(resolveSearchLimit('anything', 0)).toBe(1); + expect(resolveSearchLimit('anything', 0, mockConfig)).toBe(1); }); it('classifies short question queries as simple (5)', () => { - const limit = resolveSearchLimit('what is TypeScript?', undefined); + const limit = resolveSearchLimit('what is TypeScript?', undefined, mockConfig); expect(limit).toBe(5); }); it('classifies complex queries with temporal markers as 8', () => { - const limit = resolveSearchLimit('how did the architecture change over time', undefined); + const limit = resolveSearchLimit('how did the architecture change over time', undefined, mockConfig); expect(limit).toBe(8); }); @@ -70,58 +92,60 @@ describe('resolveSearchLimit', () => { const limit = resolveSearchLimit( 'tell me about the current status of the project deployment process', undefined, + mockConfig, ); - expect(limit).toBe(10); // "current" → multi-hop (12), clamped by maxSearchResults=10 + expect(limit).toBe(10); }); it('classifies medium queries (>9 words, no markers) as 5', () => { const limit = resolveSearchLimit( 'tell me about the overall status of the project deployment process', undefined, + mockConfig, ); expect(limit).toBe(5); }); it('classifies multi-hop queries as 12', () => { - const limit = resolveSearchLimit('compare the old and new authentication approaches', undefined); - expect(limit).toBe(10); // clamped by maxSearchResults=10 + const limit = resolveSearchLimit('compare the old and new authentication approaches', undefined, mockConfig); + expect(limit).toBe(10); }); it('classifies non-question short queries as medium (5)', () => { - const limit = resolveSearchLimit('TypeScript migration plan', undefined); + const limit = resolveSearchLimit('TypeScript migration plan', undefined, mockConfig); expect(limit).toBe(5); }); it('falls back to maxSearchResults when adaptive disabled', () => { mockConfig.adaptiveRetrievalEnabled = false; - const limit = resolveSearchLimit('how did things change', undefined); + const limit = resolveSearchLimit('how did things change', undefined, mockConfig); expect(limit).toBe(10); mockConfig.adaptiveRetrievalEnabled = true; }); it('classifies aggregation queries above maxSearchResults', () => { - const limit = resolveSearchLimit('How many model kits have I bought?', undefined); + const limit = resolveSearchLimit('How many model kits have I bought?', undefined, mockConfig); expect(limit).toBe(AGGREGATION_QUERY_LIMIT); expect(limit).toBeGreaterThan(mockConfig.maxSearchResults); }); it('detects "how many" as aggregation', () => { - expect(resolveSearchLimit('How many times did I mention yoga?', undefined)) + expect(resolveSearchLimit('How many times did I mention yoga?', undefined, mockConfig)) .toBe(AGGREGATION_QUERY_LIMIT); }); it('detects "total amount" as aggregation', () => { - expect(resolveSearchLimit('What is the total amount I spent on car mods?', undefined)) + expect(resolveSearchLimit('What is the total amount I spent on car mods?', undefined, mockConfig)) .toBe(AGGREGATION_QUERY_LIMIT); }); it('detects "list all" as aggregation', () => { - expect(resolveSearchLimit('list all the restaurants I visited', undefined)) + expect(resolveSearchLimit('list all the restaurants I visited', undefined, mockConfig)) .toBe(AGGREGATION_QUERY_LIMIT); }); it('does not classify simple "how" queries as aggregation', () => { - const limit = resolveSearchLimit('how did the architecture change', undefined); + const limit = resolveSearchLimit('how did the architecture change', undefined, mockConfig); expect(limit).not.toBe(AGGREGATION_QUERY_LIMIT); }); }); @@ -163,46 +187,47 @@ describe('resolveSearchLimitDetailed', () => { const result = resolveSearchLimitDetailed( 'What is the current status of the project?', undefined, + mockConfig, ); expect(result.classification.label).toBe('multi-hop'); expect(result.classification.matchedMarker).toBe('current'); - expect(result.limit).toBe(10); // 12 clamped to maxSearchResults=10 + expect(result.limit).toBe(10); }); }); describe('shouldRunRepairLoop', () => { it('returns false when repair loop disabled', () => { mockConfig.repairLoopEnabled = false; - expect(shouldRunRepairLoop('test query', [makeResult()])).toBe(false); + expect(shouldRunRepairLoop('test query', [makeResult()], mockConfig)).toBe(false); mockConfig.repairLoopEnabled = true; }); it('returns false for ineligible query even with no results', () => { - expect(shouldRunRepairLoop('test query', [])).toBe(false); + expect(shouldRunRepairLoop('test query', [], mockConfig)).toBe(false); }); it('returns true for eligible query with no results', () => { - expect(shouldRunRepairLoop('compare the old and new approaches', [])).toBe(true); + expect(shouldRunRepairLoop('compare the old and new approaches', [], mockConfig)).toBe(true); }); it('returns true when top similarity below threshold for eligible query', () => { const results = [makeResult({ similarity: 0.2 })]; - expect(shouldRunRepairLoop('compare the old and new approaches', results)).toBe(true); + expect(shouldRunRepairLoop('compare the old and new approaches', results, mockConfig)).toBe(true); }); it('returns false for simple query with good similarity', () => { const results = Array.from({ length: 5 }, () => makeResult({ similarity: 0.8 })); - expect(shouldRunRepairLoop('what is TypeScript', results)).toBe(false); + expect(shouldRunRepairLoop('what is TypeScript', results, mockConfig)).toBe(false); }); it('runs repair for complex query with good similarity but insufficient results', () => { const results = [makeResult({ similarity: 0.8 })]; - expect(shouldRunRepairLoop('how did the architecture change', results)).toBe(true); + expect(shouldRunRepairLoop('how did the architecture change', results, mockConfig)).toBe(true); }); it('runs repair for complex query with low similarity and insufficient results', () => { const results = [makeResult({ similarity: 0.4 })]; - expect(shouldRunRepairLoop('how did the architecture change', results)).toBe(true); + expect(shouldRunRepairLoop('how did the architecture change', results, mockConfig)).toBe(true); }); }); @@ -210,7 +235,7 @@ describe('shouldAcceptRepair', () => { it('accepts when thresholds are zero (ungated)', () => { const initial = [makeResult({ similarity: 0.5 })]; const repaired = [makeResult({ similarity: 0.51 })]; - const decision = shouldAcceptRepair(initial, repaired); + const decision = shouldAcceptRepair(initial, repaired, mockConfig); expect(decision.accepted).toBe(true); expect(decision.reason).toBe('accepted'); }); @@ -219,7 +244,7 @@ describe('shouldAcceptRepair', () => { mockConfig.repairDeltaThreshold = 0.05; const initial = [makeResult({ similarity: 0.5 })]; const repaired = [makeResult({ similarity: 0.52 })]; - const decision = shouldAcceptRepair(initial, repaired); + const decision = shouldAcceptRepair(initial, repaired, mockConfig); expect(decision.accepted).toBe(false); expect(decision.reason).toBe('delta-below-threshold'); mockConfig.repairDeltaThreshold = 0; @@ -229,7 +254,7 @@ describe('shouldAcceptRepair', () => { mockConfig.repairConfidenceFloor = 0.4; const initial = [makeResult({ similarity: 0.2 })]; const repaired = [makeResult({ similarity: 0.3 })]; - const decision = shouldAcceptRepair(initial, repaired); + const decision = shouldAcceptRepair(initial, repaired, mockConfig); expect(decision.accepted).toBe(false); expect(decision.reason).toBe('below-confidence-floor'); mockConfig.repairConfidenceFloor = 0; @@ -238,7 +263,7 @@ describe('shouldAcceptRepair', () => { it('computes correct simDelta', () => { const initial = [makeResult({ similarity: 0.4 })]; const repaired = [makeResult({ similarity: 0.7 })]; - const decision = shouldAcceptRepair(initial, repaired); + const decision = shouldAcceptRepair(initial, repaired, mockConfig); expect(decision.simDelta).toBeCloseTo(0.3, 5); expect(decision.initialTopSim).toBeCloseTo(0.4, 5); expect(decision.repairedTopSim).toBeCloseTo(0.7, 5); @@ -246,14 +271,14 @@ describe('shouldAcceptRepair', () => { it('handles empty initial results', () => { const repaired = [makeResult({ similarity: 0.5 })]; - const decision = shouldAcceptRepair([], repaired); + const decision = shouldAcceptRepair([], repaired, mockConfig); expect(decision.accepted).toBe(true); expect(decision.initialTopSim).toBe(0); }); it('handles empty repaired results', () => { const initial = [makeResult({ similarity: 0.5 })]; - const decision = shouldAcceptRepair(initial, []); + const decision = shouldAcceptRepair(initial, [], mockConfig); expect(decision.accepted).toBe(false); expect(decision.reason).toBe('delta-below-threshold'); expect(decision.repairedTopSim).toBe(0); @@ -265,7 +290,7 @@ describe('mergeSearchResults', () => { const id = 'shared-id'; const primary = [makeResult({ id, score: 0.9 })]; const repair = [makeResult({ id, score: 0.95 })]; - const merged = mergeSearchResults(primary, repair, 10); + const merged = mergeSearchResults(primary, repair, 10, mockConfig); expect(merged).toHaveLength(1); expect(merged[0].id).toBe(id); }); @@ -274,7 +299,7 @@ describe('mergeSearchResults', () => { const a = makeResult({ id: 'a', score: 0.5 }); const b = makeResult({ id: 'b', score: 0.9 }); const c = makeResult({ id: 'c', score: 0.7 }); - const merged = mergeSearchResults([a, b], [c], 10); + const merged = mergeSearchResults([a, b], [c], 10, mockConfig); expect(merged[0].id).toBe('b'); expect(merged[1].id).toBe('c'); expect(merged[2].id).toBe('a'); @@ -287,14 +312,14 @@ describe('mergeSearchResults', () => { const repair = Array.from({ length: 5 }, (_, i) => makeResult({ id: `r-${i}`, score: 0.4 + i * 0.05 }), ); - const merged = mergeSearchResults(primary, repair, 3); + const merged = mergeSearchResults(primary, repair, 3, mockConfig); expect(merged).toHaveLength(3); }); it('applies weight to repair results', () => { const primary = [makeResult({ id: 'p', score: 1.0 })]; const repair = [makeResult({ id: 'r', score: 1.0 })]; - const merged = mergeSearchResults(primary, repair, 10); + const merged = mergeSearchResults(primary, repair, 10, mockConfig); const primaryResult = merged.find((r) => r.id === 'p')!; const repairResult = merged.find((r) => r.id === 'r')!; expect(primaryResult.score).toBe(1.0); @@ -304,44 +329,36 @@ describe('mergeSearchResults', () => { describe('resolveRerankDepth', () => { it('returns rerankDepth when greater than limit', () => { - expect(resolveRerankDepth(5)).toBe(20); + expect(resolveRerankDepth(5, mockConfig)).toBe(20); }); it('returns limit when greater than rerankDepth', () => { - mockConfig.retrievalProfileSettings.rerankDepth = 3; - expect(resolveRerankDepth(5)).toBe(5); - mockConfig.retrievalProfileSettings.rerankDepth = 20; + expect(resolveRerankDepth(30, mockConfig)).toBe(30); }); it('uses aggregation limit without clamping to maxSearchResults', () => { - expect(resolveRerankDepth(AGGREGATION_QUERY_LIMIT)).toBe(AGGREGATION_QUERY_LIMIT); + expect(resolveRerankDepth(AGGREGATION_QUERY_LIMIT, mockConfig)).toBe(25); }); }); describe('isAggregationQuery', () => { it('detects "how many" patterns', () => { - expect(isAggregationQuery('how many times did i visit the gym?')).toBe(true); - expect(isAggregationQuery('how many model kits have i bought?')).toBe(true); - expect(isAggregationQuery('on how many occasions did i mention yoga?')).toBe(true); + expect(isAggregationQuery('how many projects am I working on')).toBe(true); }); it('detects "how much" patterns', () => { - expect(isAggregationQuery('how much did i spend on modifications?')).toBe(true); + expect(isAggregationQuery('how much did I spend')).toBe(true); }); it('detects "total" patterns', () => { - expect(isAggregationQuery('what is the total amount spent on car mods?')).toBe(true); - expect(isAggregationQuery('total cost of all purchases?')).toBe(true); + expect(isAggregationQuery('what is the total cost')).toBe(true); }); it('detects "list all" patterns', () => { - expect(isAggregationQuery('list all the restaurants i visited')).toBe(true); - expect(isAggregationQuery('name all the people i worked with')).toBe(true); + expect(isAggregationQuery('list all my meetings')).toBe(true); }); it('rejects non-aggregation queries', () => { - expect(isAggregationQuery('what is typescript?')).toBe(false); - expect(isAggregationQuery('who is my manager?')).toBe(false); - expect(isAggregationQuery('tell me about the project')).toBe(false); + expect(isAggregationQuery('how did the architecture change')).toBe(false); }); }); diff --git a/src/services/__tests__/search-pipeline-runtime-config.test.ts b/src/services/__tests__/search-pipeline-runtime-config.test.ts new file mode 100644 index 0000000..aee5383 --- /dev/null +++ b/src/services/__tests__/search-pipeline-runtime-config.test.ts @@ -0,0 +1,299 @@ +/** + * Runtime config seam tests for search-pipeline. + * + * Verifies that request-time runtime config can override cross-encoder + * reranking even when the module singleton differs. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { createSearchResult } from './test-fixtures.js'; + +const mockConfig = { + rerankSkipTopSimilarity: 0.85, + rerankSkipMinGap: 0.05, + mmrEnabled: false, + queryAugmentationEnabled: false, + entityGraphEnabled: false, + hybridSearchEnabled: false, + iterativeRetrievalEnabled: false, + agenticRetrievalEnabled: false, + queryExpansionEnabled: false, + linkExpansionEnabled: false, + linkExpansionMax: 0, + linkExpansionBeforeMMR: false, + mmrLambda: 0.5, + crossEncoderEnabled: true, + crossEncoderModel: 'module-cross-encoder', + crossEncoderDtype: 'q8', + retrievalProfileSettings: { + repairPrimaryWeight: 1, + repairRewriteWeight: 1, + }, +}; + +const { mockRerankCandidates } = vi.hoisted(() => ({ + mockRerankCandidates: vi.fn(), +})); +const { mockTraceStage } = vi.hoisted(() => ({ + mockTraceStage: vi.fn(), +})); + +vi.mock('../../config.js', () => ({ config: mockConfig })); +vi.mock('../embedding.js', () => ({ embedText: vi.fn().mockResolvedValue([0.1, 0.2]) })); +vi.mock('../extraction.js', () => ({ rewriteQuery: vi.fn() })); +vi.mock('../retrieval-policy.js', () => ({ + resolveRerankDepth: vi.fn((limit: number) => limit), + shouldRunRepairLoop: vi.fn(() => false), + shouldAcceptRepair: vi.fn(), +})); +vi.mock('../query-expansion.js', () => ({ + expandQueryViaEntities: vi.fn(), + augmentQueryWithEntities: vi.fn(), + coRetrieveByEntityNames: vi.fn(), +})); +vi.mock('../reranker.js', () => ({ + rerankCandidates: mockRerankCandidates, +})); +vi.mock('../retrieval-trace.js', () => ({ + TraceCollector: class { + stage = mockTraceStage; + event = vi.fn(); + finalize = vi.fn(); + setRetrievalSummary = vi.fn(); + setPackagingSummary = vi.fn(); + setAssemblySummary = vi.fn(); + getRetrievalSummary = vi.fn(() => undefined); + }, +})); +vi.mock('../abstract-query-policy.js', () => ({ + shouldUseAbstractHybridFallback: vi.fn(() => false), +})); +vi.mock('../agentic-retrieval.js', () => ({ + applyAgenticRetrieval: vi.fn(), +})); +vi.mock('../timing.js', () => ({ + timed: vi.fn(async (_name: string, fn: () => unknown) => fn()), +})); +vi.mock('../temporal-query-expansion.js', () => ({ + expandTemporalQuery: vi.fn(async () => ({ memories: [], keywords: [], anchorIds: [] })), +})); +vi.mock('../literal-query-expansion.js', () => ({ + expandLiteralQuery: vi.fn(async () => ({ memories: [], keywords: [] })), + isLiteralDetailQuery: vi.fn(() => false), +})); +vi.mock('../subject-aware-ranking.js', () => ({ + expandSubjectQuery: vi.fn(async () => ({ memories: [], anchors: [] })), + applySubjectAwareRanking: vi.fn((_query: string, results: unknown[]) => ({ + results, + subjects: [], + keywords: [], + protectedFingerprints: [], + })), +})); +vi.mock('../iterative-retrieval.js', () => ({ + applyIterativeRetrieval: vi.fn(), +})); +vi.mock('../current-state-ranking.js', () => ({ + applyCurrentStateRanking: vi.fn((_query: string, results: unknown[]) => ({ + triggered: false, + results, + })), +})); +vi.mock('../conciseness-preference.js', () => ({ + applyConcisenessPenalty: vi.fn((results: unknown[]) => results), +})); + +const { runSearchPipelineWithTrace, generateLinks } = await import('../search-pipeline.js'); + +describe('runSearchPipelineWithTrace runtime config', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockConfig.crossEncoderEnabled = true; + mockConfig.crossEncoderModel = 'module-cross-encoder'; + mockConfig.crossEncoderDtype = 'q8'; + mockRerankCandidates.mockResolvedValue([]); + }); + + it('uses runtime config to disable cross-encoder reranking', async () => { + const initialResults = [ + createSearchResult({ id: 'memory-1', score: 0.4, similarity: 0.4 }), + createSearchResult({ id: 'memory-2', score: 0.39, similarity: 0.39 }), + ]; + const repo = { + searchSimilar: vi.fn().mockResolvedValue(initialResults), + } as any; + + const result = await runSearchPipelineWithTrace( + repo, + null, + 'user-1', + 'runtime config query', + 2, + undefined, + undefined, + { + runtimeConfig: { + ...mockConfig, + crossEncoderEnabled: false, + } as any, + }, + ); + + expect(result.filtered).toHaveLength(2); + expect(mockRerankCandidates).not.toHaveBeenCalled(); + }); + + it('uses runtime config to enable cross-encoder reranking even when module config disables it', async () => { + mockConfig.crossEncoderEnabled = false; + const initialResults = [ + createSearchResult({ id: 'memory-1', score: 0.4, similarity: 0.4 }), + createSearchResult({ id: 'memory-2', score: 0.39, similarity: 0.39 }), + ]; + const rerankedResults = [...initialResults].reverse(); + mockRerankCandidates.mockResolvedValue(rerankedResults); + const repo = { + searchSimilar: vi.fn().mockResolvedValue(initialResults), + } as any; + + const result = await runSearchPipelineWithTrace( + repo, + null, + 'user-1', + 'runtime config rerank query', + 2, + undefined, + undefined, + { + runtimeConfig: { + ...mockConfig, + crossEncoderEnabled: true, + } as any, + }, + ); + + expect(result.filtered).toEqual(rerankedResults); + expect(mockRerankCandidates).toHaveBeenCalledWith( + 'runtime config rerank query', + initialResults, + { + crossEncoderModel: 'module-cross-encoder', + crossEncoderDtype: 'q8', + }, + ); + }); + + it('uses runtime config to enable agentic retrieval even when module config disables it', async () => { + const initialResults = [ + createSearchResult({ id: 'memory-1', score: 0.4, similarity: 0.4 }), + createSearchResult({ id: 'memory-2', score: 0.39, similarity: 0.39 }), + ]; + const repo = { + searchSimilar: vi.fn().mockResolvedValue(initialResults), + } as any; + const agentic = await import('../agentic-retrieval.js'); + vi.mocked(agentic.applyAgenticRetrieval).mockResolvedValue({ + memories: initialResults, + triggered: false, + subQueries: [], + reason: 'strong-initial-results', + }); + + await runSearchPipelineWithTrace( + repo, + null, + 'user-1', + 'runtime config agentic query', + 2, + undefined, + undefined, + { + runtimeConfig: { + ...mockConfig, + agenticRetrievalEnabled: true, + crossEncoderEnabled: false, + } as any, + }, + ); + + expect(agentic.applyAgenticRetrieval).toHaveBeenCalled(); + }); + + it('threads runtime reranker model and dtype through rerank and trace metadata', async () => { + const initialResults = [ + createSearchResult({ id: 'memory-1', score: 0.4, similarity: 0.4 }), + createSearchResult({ id: 'memory-2', score: 0.39, similarity: 0.39 }), + ]; + const rerankedResults = [...initialResults].reverse(); + mockRerankCandidates.mockResolvedValue(rerankedResults); + const repo = { + searchSimilar: vi.fn().mockResolvedValue(initialResults), + } as any; + + const runtimeConfig = { + ...mockConfig, + crossEncoderModel: 'runtime-cross-encoder', + crossEncoderDtype: 'fp16', + } as any; + + await runSearchPipelineWithTrace( + repo, + null, + 'user-1', + 'runtime config query', + 2, + undefined, + undefined, + { runtimeConfig }, + ); + + expect(mockRerankCandidates).toHaveBeenCalledWith( + 'runtime config query', + initialResults, + { + crossEncoderModel: 'runtime-cross-encoder', + crossEncoderDtype: 'fp16', + }, + ); + expect(mockTraceStage).toHaveBeenCalledWith( + 'cross-encoder', + rerankedResults, + { + model: 'runtime-cross-encoder', + dtype: 'fp16', + }, + ); + }); + + it('uses runtime config to enable link generation even when module config disables it', async () => { + mockConfig.linkExpansionEnabled = false; + const repo = { + getMemory: vi.fn().mockResolvedValue({ id: 'memory-1' }), + findLinkCandidates: vi.fn().mockResolvedValue([ + { id: 'linked-1', similarity: 0.77 }, + ]), + createLinks: vi.fn().mockResolvedValue(1), + } as any; + + const created = await generateLinks( + repo, + 'user-1', + ['memory-1'], + new Map([['memory-1', [0.1, 0.2]]]), + { + linkExpansionEnabled: true, + linkSimilarityThreshold: 0.42, + }, + ); + + expect(created).toBe(1); + expect(repo.findLinkCandidates).toHaveBeenCalledWith( + 'user-1', + [0.1, 0.2], + 0.42, + 'memory-1', + ); + expect(repo.createLinks).toHaveBeenCalledWith([ + { sourceId: 'memory-1', targetId: 'linked-1', similarity: 0.77 }, + ]); + }); +}); diff --git a/src/services/agentic-retrieval.ts b/src/services/agentic-retrieval.ts index e503c46..ca0f341 100644 --- a/src/services/agentic-retrieval.ts +++ b/src/services/agentic-retrieval.ts @@ -19,6 +19,7 @@ import { llm } from './llm.js'; import { embedText } from './embedding.js'; import { mergeSearchResults } from './retrieval-policy.js'; +import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import type { MemoryRepository, SearchResult } from '../db/memory-repository.js'; import { config } from '../config.js'; @@ -45,6 +46,11 @@ interface SufficiencyResult { subQueries: string[]; } +type AgenticRetrievalRuntimeConfig = Pick< + CoreRuntimeConfig, + 'hybridSearchEnabled' | 'retrievalProfileSettings' | 'maxSearchResults' +>; + /** * Check if retrieved memories are sufficient and decompose if not. * Returns null if sufficient (no additional retrieval needed). @@ -94,12 +100,13 @@ async function retrieveSubQueries( userId: string, subQueries: string[], candidateDepth: number, + policyConfig: AgenticRetrievalRuntimeConfig, sourceSite?: string, referenceTime?: Date, ): Promise { const retrievalPromises = subQueries.map(async (subQuery) => { const embedding = await embedText(subQuery, 'query'); - if (config.hybridSearchEnabled) { + if (policyConfig.hybridSearchEnabled) { return repo.searchHybrid(userId, subQuery, embedding, candidateDepth, sourceSite, referenceTime); } return repo.searchSimilar(userId, embedding, candidateDepth, sourceSite, referenceTime); @@ -110,7 +117,7 @@ async function retrieveSubQueries( // Fuse all sub-query results via weighted merge let fused: SearchResult[] = []; for (const subResult of results) { - fused = mergeSearchResults(fused, subResult, candidateDepth); + fused = mergeSearchResults(fused, subResult, candidateDepth, policyConfig); } return fused; } @@ -139,6 +146,7 @@ export async function applyAgenticRetrieval( candidateDepth: number, sourceSite?: string, referenceTime?: Date, + policyConfig: AgenticRetrievalRuntimeConfig = config, ): Promise { // Quick gate: skip for queries that already have strong results if (initialResults.length >= 3 && initialResults[0].similarity >= 0.85) { @@ -162,11 +170,11 @@ export async function applyAgenticRetrieval( } const subQueryResults = await retrieveSubQueries( - repo, userId, sufficiency.subQueries, candidateDepth, sourceSite, referenceTime, + repo, userId, sufficiency.subQueries, candidateDepth, policyConfig, sourceSite, referenceTime, ); // Merge initial + sub-query results - const merged = mergeSearchResults(initialResults, subQueryResults, candidateDepth); + const merged = mergeSearchResults(initialResults, subQueryResults, candidateDepth, policyConfig); console.log(`[agentic-retrieval] Merged: ${initialResults.length} initial + ${subQueryResults.length} sub-query → ${merged.length} total`); diff --git a/src/services/consolidation-service.ts b/src/services/consolidation-service.ts index 913548d..1ba2e9a 100644 --- a/src/services/consolidation-service.ts +++ b/src/services/consolidation-service.ts @@ -17,6 +17,7 @@ import { config } from '../config.js'; import { MemoryRepository } from '../db/memory-repository.js'; import { ClaimRepository } from '../db/repository-claims.js'; import type { MemoryRow } from '../db/repository-types.js'; +import type { IngestRuntimeConfig } from './memory-service-types.js'; import { formClusters, type AffinityConfig, @@ -26,8 +27,10 @@ import { import { llm } from './llm.js'; import { embedText } from './embedding.js'; import { emitAuditEvent } from './audit-events.js'; +import { emitLineageEvent } from './memory-lineage.js'; const DEFAULT_CONSOLIDATION_BATCH_SIZE = 200; +type ConsolidationRuntimeConfig = Pick; export interface ConsolidationConfig { /** Max memories to scan per consolidation run. */ @@ -95,8 +98,10 @@ export async function executeConsolidation( claims: ClaimRepository, userId: string, consolidationConfig?: Partial, + runtimeConfig?: ConsolidationRuntimeConfig, ): Promise { const candidates = await findConsolidationCandidates(repo, userId, consolidationConfig); + const lineageConfig = runtimeConfig ?? config; let memoriesArchived = 0; let clustersConsolidated = 0; @@ -117,6 +122,7 @@ export async function executeConsolidation( if (validMembers.length < 2) continue; const importance = Math.max(...validMembers.map((m) => m.importance)); + const consolidatedImportance = Math.min(1.0, importance + 0.05); const sourceSite = validMembers[0].source_site; const embedding = await embedText(synthesized); @@ -125,7 +131,7 @@ export async function executeConsolidation( content: synthesized, embedding, memoryType: 'semantic', - importance: Math.min(1.0, importance + 0.05), + importance: consolidatedImportance, sourceSite, metadata: { consolidated_from: cluster.memberIds, @@ -134,22 +140,16 @@ export async function executeConsolidation( }, }); - const claimId = await claims.createClaim(userId, 'consolidated'); - const versionId = await claims.createClaimVersion({ - claimId, + await emitLineageEvent({ claims, config: lineageConfig }, { + kind: 'consolidation-add', userId, memoryId: consolidatedId, content: synthesized, embedding, - importance: Math.min(1.0, importance + 0.05), + importance: consolidatedImportance, sourceSite, - provenance: { - mutationType: 'add', - mutationReason: `Consolidated ${cluster.memberCount} memories (avg affinity: ${cluster.avgAffinity.toFixed(2)})`, - actorModel: config.llmModel, - }, + mutationReason: `Consolidated ${cluster.memberCount} memories (avg affinity: ${cluster.avgAffinity.toFixed(2)})`, }); - await claims.setClaimCurrentVersion(claimId, versionId); for (const member of validMembers) { await repo.softDeleteMemory(userId, member.id); diff --git a/src/services/memory-audn.ts b/src/services/memory-audn.ts index d3b1155..306aed6 100644 --- a/src/services/memory-audn.ts +++ b/src/services/memory-audn.ts @@ -4,7 +4,6 @@ * and the full mutation pipeline (update, supersede, delete canonical facts). */ -import { config } from '../config.js'; import { type ClaimSlotInput } from '../db/claim-repository.js'; import { embedText } from './embedding.js'; import { type AUDNDecision } from './extraction.js'; @@ -20,7 +19,8 @@ import { emitAuditEvent } from './audit-events.js'; import { recordContradictionLesson } from './lesson-service.js'; import { shouldDeferAudn, deferMemoryForReconciliation } from './deferred-audn.js'; import { timed } from './timing.js'; -import { storeCanonicalFact, createMutationCanonicalObject, storeProjection, applyEntityScopedDedup, ensureClaimTarget, findConflictCandidates, findSlotConflictCandidates } from './memory-storage.js'; +import { emitLineageEvent } from './memory-lineage.js'; +import { storeCanonicalFact, storeProjection, applyEntityScopedDedup, ensureClaimTarget, findConflictCandidates, findSlotConflictCandidates } from './memory-storage.js'; import type { AudnFactContext, ClaimTarget, @@ -65,7 +65,7 @@ export async function resolveAndExecuteAudn( const candidateIds = new Set(filteredCandidates.map((c) => c.id)); const ctx: AudnFactContext = { userId, fact, embedding, sourceSite, sourceUrl, episodeId, trustScore, claimSlot, logicalTimestamp }; - const fastDecision = tryFastAUDN(fact.fact, filteredCandidates); + const fastDecision = tryFastAUDN(fact.fact, filteredCandidates, deps.config); if (fastDecision) { return executeAndTrackSupersede(deps, fastDecision, candidateIds, ctx, supersededTargets); } @@ -81,7 +81,7 @@ export async function resolveAndExecuteAudn( const rawDecision = await timed('ingest.fact.audn', () => cachedResolveAUDN(fact.fact, filteredCandidates)); let decision = applyClarificationOverrides(rawDecision, fact.fact, filteredCandidates, fact.keywords, fact.type); - if (config.entityGraphEnabled && deps.entities) { + if (deps.config.entityGraphEnabled && deps.entities) { decision = await applyEntityScopedDedup(deps, decision, userId, fact.entities); } return executeAndTrackSupersede(deps, decision, candidateIds, ctx, supersededTargets); @@ -221,20 +221,23 @@ async function updateCanonicalFact( metadata: entry.metadata, validFrom: entry.validFrom, validTo: entry.validTo, })), ); - const mutationReason = `Updated from: "${fact.fact.slice(0, 100)}"`; - const newVersionId = await deps.claims.createUpdateVersion({ - oldVersionId: target.versionId, claimId: target.claimId, - userId, memoryId: target.memoryId, content: decision.updatedContent, embedding: updatedEmbedding, - importance: fact.importance, sourceSite, sourceUrl, episodeId, - validFrom: logicalTimestamp, mutationReason, actorModel: config.llmModel, - }); - await deps.claims.addEvidence({ claimVersionId: newVersionId, episodeId, memoryId: target.memoryId, quoteText: fact.fact }); - const cmoId = await createMutationCanonicalObject(deps, userId, { ...fact, fact: decision.updatedContent }, sourceSite, sourceUrl, episodeId, logicalTimestamp, { - mutationType: 'update', previousObjectId: target.cmoId, claimId: target.claimId, - claimVersionId: newVersionId, previousVersionId: target.versionId, mutationReason, + const lineage = await emitLineageEvent({ claims: deps.claims, repo: deps.repo, config: deps.config }, { + kind: 'canonical-update', + userId, + fact, + updatedContent: decision.updatedContent, + updatedEmbedding, + sourceSite, + sourceUrl, + episodeId, + logicalTimestamp, + target, contradictionConfidence: decision.contradictionConfidence, }); - await deps.repo.updateMemoryMetadata(userId, target.memoryId, { cmo_id: cmoId }); + if (!lineage?.cmoId) { + throw new Error(`AUDN UPDATE failed: missing successor canonical object for "${target.memoryId}"`); + } + await deps.repo.updateMemoryMetadata(userId, target.memoryId, { cmo_id: lineage.cmoId }); return { outcome: 'updated', memoryId: target.memoryId }; } @@ -255,25 +258,24 @@ async function supersedeCanonicalFact( await deps.repo.expireMemory(userId, target.memoryId); const newMemoryId = await storeProjection(deps, userId, fact, embedding, sourceSite, sourceUrl, episodeId, trustScore ?? 1.0); if (!newMemoryId) return { outcome: 'skipped', memoryId: null }; - const mutationReason = `Superseded memory "${target.memoryId}" with new fact`; - const newVersionId = await deps.claims.createClaimVersion({ - claimId: target.claimId, userId, memoryId: newMemoryId, content: fact.fact, embedding, - importance: fact.importance, sourceSite, sourceUrl, episodeId, - validFrom: logicalTimestamp, - provenance: { - mutationType: 'supersede', mutationReason, previousVersionId: target.versionId, - actorModel: config.llmModel, contradictionConfidence: contradictionConfidence ?? undefined, - }, + const lineage = await emitLineageEvent({ claims: deps.claims, repo: deps.repo, config: deps.config }, { + kind: 'canonical-supersede', + userId, + fact, + embedding, + sourceSite, + sourceUrl, + episodeId, + logicalTimestamp, + target, + newMemoryId, + contradictionConfidence, }); - await deps.claims.supersedeClaimVersion(userId, target.versionId, newVersionId, logicalTimestamp ?? new Date()); - await deps.claims.setClaimCurrentVersion(target.claimId, newVersionId, 'active', logicalTimestamp); - await deps.claims.addEvidence({ claimVersionId: newVersionId, episodeId, memoryId: newMemoryId, quoteText: fact.fact }); - const cmoId = await createMutationCanonicalObject(deps, userId, fact, sourceSite, sourceUrl, episodeId, logicalTimestamp, { - mutationType: 'supersede', previousObjectId: target.cmoId, claimId: target.claimId, - claimVersionId: newVersionId, previousVersionId: target.versionId, mutationReason, contradictionConfidence, - }); - await deps.repo.updateMemoryMetadata(userId, newMemoryId, { cmo_id: cmoId }); - if (config.lessonsEnabled && deps.lessons && contradictionConfidence) { + if (!lineage?.cmoId) { + throw new Error(`AUDN SUPERSEDE failed: missing successor canonical object for "${target.memoryId}"`); + } + await deps.repo.updateMemoryMetadata(userId, newMemoryId, { cmo_id: lineage.cmoId }); + if (deps.config.lessonsEnabled && deps.lessons && contradictionConfidence) { recordContradictionLesson(deps.lessons, { userId, content: fact.fact, sourceSite, contradictionConfidence, supersededMemoryId: target.memoryId, @@ -298,23 +300,19 @@ async function deleteCanonicalFact( const targetMemory = await deps.repo.getMemoryIncludingDeleted(target.memoryId, userId); if (!targetMemory) return { outcome: 'skipped', memoryId: null }; await deps.repo.softDeleteMemory(userId, target.memoryId); - const mutationReason = `Deleted memory "${target.memoryId}" — fact: "${fact.fact.slice(0, 100)}"`; - const deleteVersionId = await deps.claims.createClaimVersion({ - claimId: target.claimId, userId, memoryId: undefined, content: `[DELETED] ${fact.fact}`, embedding: targetMemory.embedding, - importance: 0, sourceSite: '', sourceUrl: '', episodeId, - validFrom: logicalTimestamp, - provenance: { - mutationType: 'delete', mutationReason, previousVersionId: target.versionId, - actorModel: config.llmModel, contradictionConfidence: contradictionConfidence ?? undefined, - }, - }); - await deps.claims.supersedeClaimVersion(userId, target.versionId, deleteVersionId, logicalTimestamp ?? new Date()); - await deps.claims.invalidateClaim(userId, target.claimId, logicalTimestamp ?? new Date(), deleteVersionId); - await createMutationCanonicalObject(deps, userId, fact, sourceSite, sourceUrl, episodeId, logicalTimestamp, { - mutationType: 'delete', previousObjectId: target.cmoId, claimId: target.claimId, - claimVersionId: deleteVersionId, previousVersionId: target.versionId, mutationReason, contradictionConfidence, + await emitLineageEvent({ claims: deps.claims, repo: deps.repo, config: deps.config }, { + kind: 'canonical-delete', + userId, + fact, + sourceSite, + sourceUrl, + episodeId, + logicalTimestamp, + target, + targetEmbedding: targetMemory.embedding, + contradictionConfidence, }); - if (config.auditLoggingEnabled) { + if (deps.config.auditLoggingEnabled) { emitAuditEvent('memory:delete', userId, { reason: 'audn-delete', targetMemoryId: target.memoryId, contradictionConfidence, }, { memoryId: target.memoryId }); @@ -364,8 +362,12 @@ function extractQuotedLiterals(text: string): string[] { * sim >= 0.95: near-duplicate -> NOOP (skip storing). * Returns null when the case is ambiguous and needs full LLM AUDN. */ -function tryFastAUDN(factText: string, candidates: CandidateMemory[]): AUDNDecision | null { - if (!config.fastAudnEnabled) return null; +function tryFastAUDN( + factText: string, + candidates: CandidateMemory[], + runtimeConfig: Pick, +): AUDNDecision | null { + if (!runtimeConfig.fastAudnEnabled) return null; const topCandidate = candidates.reduce( (best, c) => (c.similarity > best.similarity ? c : best), @@ -376,8 +378,8 @@ function tryFastAUDN(factText: string, candidates: CandidateMemory[]): AUDNDecis return null; } - if (topCandidate.similarity >= config.fastAudnDuplicateThreshold) { - console.log(`[fast-audn] NOOP: sim=${topCandidate.similarity.toFixed(4)} >= ${config.fastAudnDuplicateThreshold} (near-duplicate of ${topCandidate.id})`); + if (topCandidate.similarity >= runtimeConfig.fastAudnDuplicateThreshold) { + console.log(`[fast-audn] NOOP: sim=${topCandidate.similarity.toFixed(4)} >= ${runtimeConfig.fastAudnDuplicateThreshold} (near-duplicate of ${topCandidate.id})`); return { action: 'NOOP', targetMemoryId: topCandidate.id, @@ -388,5 +390,3 @@ function tryFastAUDN(factText: string, candidates: CandidateMemory[]): AUDNDecis return null; } - - diff --git a/src/services/memory-crud.ts b/src/services/memory-crud.ts index 3bc38b0..4491d2c 100644 --- a/src/services/memory-crud.ts +++ b/src/services/memory-crud.ts @@ -97,7 +97,7 @@ export async function consolidate(deps: MemoryServiceDeps, userId: string): Prom /** Execute consolidation: synthesize clusters via LLM and archive originals. */ export async function performExecuteConsolidation(deps: MemoryServiceDeps, userId: string): Promise { - return executeConsolidation(deps.repo, deps.claims, userId); + return executeConsolidation(deps.repo, deps.claims, userId, undefined, deps.config); } /** Run deferred AUDN reconciliation for a user (background pass). */ diff --git a/src/services/memory-ingest.ts b/src/services/memory-ingest.ts index 029d086..2c60e62 100644 --- a/src/services/memory-ingest.ts +++ b/src/services/memory-ingest.ts @@ -3,7 +3,6 @@ * Delegates AUDN resolution to memory-audn.ts and storage to memory-storage.ts. */ -import { config } from '../config.js'; import { embedText } from './embedding.js'; import { cachedResolveAUDN } from './extraction-cache.js'; import { consensusExtractFacts } from './consensus-extraction.js'; @@ -90,10 +89,13 @@ export async function performIngest( await timed('ingest.backdate', () => deps.repo.backdateMemories(acc.memoryIds, sessionTimestamp)); } - const linksCreated = await timed('ingest.links', () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache)); + const linksCreated = await timed( + 'ingest.links', + () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache, deps.config), + ); let compositesCreated = 0; - if (config.compositeGroupingEnabled && storedFacts.length >= config.compositeMinClusterSize) { + if (deps.config.compositeGroupingEnabled && storedFacts.length >= deps.config.compositeMinClusterSize) { compositesCreated = await timed('ingest.composites', () => generateAndStoreComposites(deps, userId, storedFacts, acc.embeddingCache, sourceSite, sourceUrl, episodeId), ); @@ -132,7 +134,10 @@ export async function performQuickIngest( await deps.repo.backdateMemories(acc.memoryIds, sessionTimestamp); } - const linksCreated = await timed('quick-ingest.links', () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache)); + const linksCreated = await timed( + 'quick-ingest.links', + () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache, deps.config), + ); console.log(`[timing] quick-ingest.total: ${(performance.now() - ingestStart).toFixed(1)}ms (${extractedFacts.length} facts, ${acc.counters.stored} stored, ${acc.counters.skipped} skipped)`); return buildIngestResult(episodeId, extractedFacts.length, acc, linksCreated, 0); } @@ -215,7 +220,10 @@ export async function performWorkspaceIngest( await timed('ws-ingest.backdate', () => deps.repo.backdateMemories(acc.memoryIds, sessionTimestamp)); } - const linksCreated = await timed('ws-ingest.links', () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache)); + const linksCreated = await timed( + 'ws-ingest.links', + () => generateLinks(deps.repo, userId, acc.memoryIds, acc.embeddingCache, deps.config), + ); console.log(`[timing] ws-ingest.total: ${(performance.now() - ingestStart).toFixed(1)}ms (${facts.length} facts, workspace=${workspace.workspaceId})`); return buildIngestResult(episodeId, facts.length, acc, linksCreated, 0); } @@ -236,14 +244,14 @@ async function quickIngestFact( const claimSlot = await resolveDeterministicClaimSlot(deps, userId, fact); const [vectorCandidates, slotCandidates] = await timed('quick-ingest.fact.find-dupes', async () => Promise.all([ - deps.repo.findNearDuplicates(userId, embedding, config.audnCandidateThreshold), + deps.repo.findNearDuplicates(userId, embedding, deps.config.audnCandidateThreshold), findSlotConflictCandidates(deps, userId, claimSlot), ])); const candidates = mergeCandidates(vectorCandidates, slotCandidates); if (candidates.length > 0) { const topCandidate = candidates.reduce((a, b) => a.similarity > b.similarity ? a : b); - if (topCandidate.similarity >= config.fastAudnDuplicateThreshold) { + if (topCandidate.similarity >= deps.config.fastAudnDuplicateThreshold) { // Near-duplicate: skip but return the existing memory ID so callers // can link to the canonical memory (e.g. integration sync pointer rows). return { outcome: 'skipped', memoryId: topCandidate.id }; @@ -274,7 +282,7 @@ async function ingestFact( return { outcome: 'skipped', memoryId: null }; } - if (!passesEntropyGate(fact, embedding, entropyCtx)) { + if (!passesEntropyGate(fact, embedding, entropyCtx, deps.config)) { return { outcome: 'skipped', memoryId: null }; } @@ -318,7 +326,7 @@ async function workspaceIngestFact( const networkResult = classifyNetwork(fact as any); const candidates = await deps.repo.findNearDuplicatesInWorkspace( - workspace.workspaceId, embedding, config.audnCandidateThreshold, 10, 'all', workspace.agentId, + workspace.workspaceId, embedding, deps.config.audnCandidateThreshold, 10, 'all', workspace.agentId, ); if (candidates.length === 0) { @@ -387,8 +395,16 @@ async function storeWorkspaceMemory( } /** Check entropy gate; returns false if the fact should be skipped. */ -function passesEntropyGate(fact: FactInput, embedding: number[], entropyCtx: EntropyContext): boolean { - if (!config.entropyGateEnabled) return true; +function passesEntropyGate( + fact: FactInput, + embedding: number[], + entropyCtx: EntropyContext, + runtimeConfig: Pick< + MemoryServiceDeps['config'], + 'entropyGateEnabled' | 'entropyGateThreshold' | 'entropyGateAlpha' + >, +): boolean { + if (!runtimeConfig.entropyGateEnabled) return true; const entropyResult = computeEntropyScore( { windowEntities: fact.keywords, @@ -396,7 +412,7 @@ function passesEntropyGate(fact: FactInput, embedding: number[], entropyCtx: Ent windowEmbedding: embedding, previousEmbedding: entropyCtx.previousEmbedding, }, - { threshold: config.entropyGateThreshold, alpha: config.entropyGateAlpha }, + { threshold: runtimeConfig.entropyGateThreshold, alpha: runtimeConfig.entropyGateAlpha }, ); entropyCtx.previousEmbedding = embedding; for (const kw of fact.keywords) entropyCtx.seenEntities.add(kw); diff --git a/src/services/memory-lineage.ts b/src/services/memory-lineage.ts new file mode 100644 index 0000000..98ed47f --- /dev/null +++ b/src/services/memory-lineage.ts @@ -0,0 +1,374 @@ +/** + * Internal claim-lineage emission seam for the existing lineage-producing write + * paths only. + * + * This module centralizes the current claim/version/evidence/canonical-object + * write sequences without changing their semantics. It deliberately models the + * current consolidation anomaly as its own variant: consolidation creates a + * claim/version pair but does not emit a mutation canonical memory object. + * + * Out of scope: + * - schema changes + * - new mutation types + * - workspace/scope behavior changes + * - routing lineage-bypassing paths through claim versions + */ + +import type { ClaimSlotInput } from '../db/claim-repository.js'; +import type { IngestRuntimeConfig } from './memory-service-types.js'; +import type { ClaimTarget, FactInput } from './memory-service-types.js'; + +type MutationType = 'add' | 'update' | 'supersede' | 'delete'; +type MutationProvenanceType = MutationType | 'clarify'; + +type MutationCanonicalObjectRepo = { + storeCanonicalMemoryObject(input: { + userId: string; + objectFamily: 'ingested_fact'; + canonicalPayload: ReturnType; + provenance: { episodeId: string; sourceSite: string; sourceUrl: string }; + observedAt: Date | undefined; + lineage: { + mutationType: MutationType; + previousObjectId: string | null; + claimId?: string; + claimVersionId?: string; + previousVersionId?: string; + mutationReason?: string; + contradictionConfidence?: number | null; + actorModel?: string | null; + }; + }): Promise; +}; + +type LineageClaimsPort = { + createClaim(userId: string, claimType: string, validAt?: Date, claimSlot?: ClaimSlotInput | null): Promise; + createClaimVersion(input: { + claimId: string; + userId: string; + memoryId?: string; + content: string; + embedding: number[]; + importance: number; + sourceSite: string; + sourceUrl?: string; + episodeId?: string; + validFrom?: Date; + provenance?: { + mutationType?: MutationProvenanceType; + mutationReason?: string; + previousVersionId?: string; + actorModel?: string; + contradictionConfidence?: number; + }; + }): Promise; + setClaimCurrentVersion(claimId: string, versionId: string | null, status?: string, validAt?: Date): Promise; + addEvidence(input: { claimVersionId: string; episodeId?: string; memoryId?: string; quoteText?: string; speaker?: string }): Promise; + createUpdateVersion(input: { + oldVersionId: string; + claimId: string; + userId: string; + memoryId: string; + content: string; + embedding: number[]; + importance: number; + sourceSite: string; + sourceUrl?: string; + episodeId?: string; + validFrom?: Date; + mutationReason?: string; + actorModel?: string; + }): Promise; + supersedeClaimVersion(userId: string, versionId: string, supersededByVersionId: string | null, validTo?: Date): Promise; + invalidateClaim(userId: string, claimId: string, invalidAt?: Date, invalidatedByVersionId?: string | null, status?: string): Promise; +}; + +type LineageDeps = { + claims: LineageClaimsPort; + repo?: MutationCanonicalObjectRepo; + config: Pick; +}; + +type BackfillMemory = { + id: string; + content: string; + embedding: number[]; + importance: number; + sourceSite: string; + sourceUrl: string; + episodeId?: string; + createdAt: Date; + memoryType: string; + cmoId: string | null; +}; + +type LineageEvent = + | { kind: 'canonical-add'; userId: string; fact: FactInput; embedding: number[]; sourceSite: string; sourceUrl: string; episodeId: string; logicalTimestamp: Date | undefined; claimSlot: ClaimSlotInput | null; createProjection: (cmoId: string) => Promise } + | { kind: 'claim-backfill'; userId: string; memory: BackfillMemory } + | { kind: 'consolidation-add'; userId: string; memoryId: string; content: string; embedding: number[]; importance: number; sourceSite: string; mutationReason: string } + | { kind: 'canonical-update'; userId: string; fact: FactInput; updatedContent: string; updatedEmbedding: number[]; sourceSite: string; sourceUrl: string; episodeId: string; logicalTimestamp: Date | undefined; target: ClaimTarget; contradictionConfidence?: number | null } + | { kind: 'canonical-supersede'; userId: string; fact: FactInput; embedding: number[]; sourceSite: string; sourceUrl: string; episodeId: string; logicalTimestamp: Date | undefined; target: ClaimTarget; newMemoryId: string; contradictionConfidence?: number | null } + | { kind: 'canonical-delete'; userId: string; fact: FactInput; sourceSite: string; sourceUrl: string; episodeId: string; logicalTimestamp: Date | undefined; target: ClaimTarget; targetEmbedding: number[]; contradictionConfidence?: number | null }; + +export type LineageEmission = { claimId: string; versionId: string; memoryId: string | null; cmoId: string | null }; + +export async function emitLineageEvent( + deps: LineageDeps, + event: LineageEvent, +): Promise { + switch (event.kind) { + case 'canonical-add': + return emitCanonicalAdd(deps, event); + case 'claim-backfill': + return emitBackfill(deps, event); + case 'consolidation-add': + return emitConsolidationAdd(deps, event); + case 'canonical-update': + return emitCanonicalUpdate(deps, event); + case 'canonical-supersede': + return emitCanonicalSupersede(deps, event); + case 'canonical-delete': + return emitCanonicalDelete(deps, event); + } +} + +function buildCanonicalPayload(fact: FactInput) { + return { + factText: fact.fact, + factType: fact.type, + headline: fact.headline, + keywords: fact.keywords, + }; +} + +async function emitCanonicalAdd( + deps: LineageDeps, + event: Extract, +): Promise { + const cmoId = await requireRepo(deps).storeCanonicalMemoryObject({ + userId: event.userId, + objectFamily: 'ingested_fact', + canonicalPayload: buildCanonicalPayload(event.fact), + provenance: { episodeId: event.episodeId, sourceSite: event.sourceSite, sourceUrl: event.sourceUrl }, + observedAt: event.logicalTimestamp, + lineage: { mutationType: 'add', previousObjectId: null }, + }); + const memoryId = await event.createProjection(cmoId); + if (!memoryId) return null; + + const claimId = await deps.claims.createClaim( + event.userId, + event.fact.type, + event.logicalTimestamp, + event.claimSlot, + ); + const versionId = await deps.claims.createClaimVersion({ + claimId, + userId: event.userId, + memoryId, + content: event.fact.fact, + embedding: event.embedding, + importance: event.fact.importance, + sourceSite: event.sourceSite, + sourceUrl: event.sourceUrl, + episodeId: event.episodeId, + validFrom: event.logicalTimestamp, + provenance: { mutationType: 'add', actorModel: lineageActorModel(deps) }, + }); + await deps.claims.setClaimCurrentVersion(claimId, versionId, 'active', event.logicalTimestamp); + await deps.claims.addEvidence({ claimVersionId: versionId, episodeId: event.episodeId, memoryId, quoteText: event.fact.fact }); + return { claimId, versionId, memoryId, cmoId }; +} + +async function emitBackfill( + deps: LineageDeps, + event: Extract, +): Promise { + const claimId = await deps.claims.createClaim(event.userId, event.memory.memoryType, event.memory.createdAt); + const versionId = await deps.claims.createClaimVersion({ + claimId, + userId: event.userId, + memoryId: event.memory.id, + content: event.memory.content, + embedding: event.memory.embedding, + importance: event.memory.importance, + sourceSite: event.memory.sourceSite, + sourceUrl: event.memory.sourceUrl, + episodeId: event.memory.episodeId, + validFrom: event.memory.createdAt, + }); + await deps.claims.setClaimCurrentVersion(claimId, versionId, 'active', event.memory.createdAt); + await deps.claims.addEvidence({ + claimVersionId: versionId, + episodeId: event.memory.episodeId, + memoryId: event.memory.id, + quoteText: event.memory.content, + }); + return { claimId, versionId, memoryId: event.memory.id, cmoId: event.memory.cmoId }; +} + +async function emitConsolidationAdd( + deps: LineageDeps, + event: Extract, +): Promise { + const claimId = await deps.claims.createClaim(event.userId, 'consolidated'); + const versionId = await deps.claims.createClaimVersion({ + claimId, + userId: event.userId, + memoryId: event.memoryId, + content: event.content, + embedding: event.embedding, + importance: event.importance, + sourceSite: event.sourceSite, + provenance: { + mutationType: 'add', + mutationReason: event.mutationReason, + actorModel: lineageActorModel(deps), + }, + }); + await deps.claims.setClaimCurrentVersion(claimId, versionId); + return { claimId, versionId, memoryId: event.memoryId, cmoId: null }; +} + +async function emitCanonicalUpdate( + deps: LineageDeps, + event: Extract, +): Promise { + const mutationReason = `Updated from: "${event.fact.fact.slice(0, 100)}"`; + const versionId = await deps.claims.createUpdateVersion({ + oldVersionId: event.target.versionId, + claimId: event.target.claimId, + userId: event.userId, + memoryId: event.target.memoryId, + content: event.updatedContent, + embedding: event.updatedEmbedding, + importance: event.fact.importance, + sourceSite: event.sourceSite, + sourceUrl: event.sourceUrl, + episodeId: event.episodeId, + validFrom: event.logicalTimestamp, + mutationReason, + actorModel: lineageActorModel(deps), + }); + await deps.claims.addEvidence({ + claimVersionId: versionId, + episodeId: event.episodeId, + memoryId: event.target.memoryId, + quoteText: event.fact.fact, + }); + const cmoId = await createMutationCanonicalObject(deps, event, versionId, mutationReason, { + ...event.fact, + fact: event.updatedContent, + }); + return { claimId: event.target.claimId, versionId, memoryId: event.target.memoryId, cmoId }; +} + +async function emitCanonicalSupersede( + deps: LineageDeps, + event: Extract, +): Promise { + const mutationReason = `Superseded memory "${event.target.memoryId}" with new fact`; + const versionId = await deps.claims.createClaimVersion({ + claimId: event.target.claimId, + userId: event.userId, + memoryId: event.newMemoryId, + content: event.fact.fact, + embedding: event.embedding, + importance: event.fact.importance, + sourceSite: event.sourceSite, + sourceUrl: event.sourceUrl, + episodeId: event.episodeId, + validFrom: event.logicalTimestamp, + provenance: { + mutationType: 'supersede', + mutationReason, + previousVersionId: event.target.versionId, + actorModel: lineageActorModel(deps), + contradictionConfidence: event.contradictionConfidence ?? undefined, + }, + }); + await deps.claims.supersedeClaimVersion(event.userId, event.target.versionId, versionId, event.logicalTimestamp ?? new Date()); + await deps.claims.setClaimCurrentVersion(event.target.claimId, versionId, 'active', event.logicalTimestamp); + await deps.claims.addEvidence({ + claimVersionId: versionId, + episodeId: event.episodeId, + memoryId: event.newMemoryId, + quoteText: event.fact.fact, + }); + const cmoId = await createMutationCanonicalObject(deps, event, versionId, mutationReason, event.fact); + return { claimId: event.target.claimId, versionId, memoryId: event.newMemoryId, cmoId }; +} + +async function emitCanonicalDelete( + deps: LineageDeps, + event: Extract, +): Promise { + const mutationReason = `Deleted memory "${event.target.memoryId}" — fact: "${event.fact.fact.slice(0, 100)}"`; + const versionId = await deps.claims.createClaimVersion({ + claimId: event.target.claimId, + userId: event.userId, + content: `[DELETED] ${event.fact.fact}`, + embedding: event.targetEmbedding, + importance: 0, + sourceSite: '', + sourceUrl: '', + episodeId: event.episodeId, + validFrom: event.logicalTimestamp, + provenance: { + mutationType: 'delete', + mutationReason, + previousVersionId: event.target.versionId, + actorModel: lineageActorModel(deps), + contradictionConfidence: event.contradictionConfidence ?? undefined, + }, + }); + await deps.claims.supersedeClaimVersion(event.userId, event.target.versionId, versionId, event.logicalTimestamp ?? new Date()); + await deps.claims.invalidateClaim(event.userId, event.target.claimId, event.logicalTimestamp ?? new Date(), versionId); + const cmoId = await createMutationCanonicalObject(deps, event, versionId, mutationReason, event.fact); + return { claimId: event.target.claimId, versionId, memoryId: null, cmoId }; +} + +async function createMutationCanonicalObject( + deps: LineageDeps, + event: Extract, + claimVersionId: string, + mutationReason: string, + fact: FactInput, +): Promise { + return requireRepo(deps).storeCanonicalMemoryObject({ + userId: event.userId, + objectFamily: 'ingested_fact', + canonicalPayload: buildCanonicalPayload(fact), + provenance: { episodeId: event.episodeId, sourceSite: event.sourceSite, sourceUrl: event.sourceUrl }, + observedAt: event.logicalTimestamp, + lineage: { + mutationType: mutationTypeFor(event.kind), + previousObjectId: event.target.cmoId, + claimId: event.target.claimId, + claimVersionId, + previousVersionId: event.target.versionId, + mutationReason, + contradictionConfidence: event.contradictionConfidence ?? undefined, + actorModel: lineageActorModel(deps), + }, + }); +} + +function mutationTypeFor( + kind: 'canonical-update' | 'canonical-supersede' | 'canonical-delete', +): 'update' | 'supersede' | 'delete' { + if (kind === 'canonical-update') return 'update'; + if (kind === 'canonical-supersede') return 'supersede'; + return 'delete'; +} + +function requireRepo(deps: LineageDeps): MutationCanonicalObjectRepo { + if (!deps.repo) { + throw new Error('Lineage event requires canonical object repository access'); + } + return deps.repo; +} + +function lineageActorModel(deps: LineageDeps): string { + return deps.config.llmModel; +} diff --git a/src/services/memory-search.ts b/src/services/memory-search.ts index d9152c1..c8ef8e5 100644 --- a/src/services/memory-search.ts +++ b/src/services/memory-search.ts @@ -3,7 +3,6 @@ * Contains search, fastSearch, workspaceSearch, and all private search helpers. */ -import { config } from '../config.js'; import { type SearchResult } from '../db/memory-repository.js'; import { checkLessons, recordContradictionLesson, type LessonCheckResult } from './lesson-service.js'; import { validateConsensus, type ConsensusResult } from './consensus-validation.js'; @@ -24,7 +23,7 @@ import type { MemoryServiceDeps, RetrievalMode, RetrievalOptions, RetrievalResul /** Check lessons safety gate; returns undefined if lessons disabled. */ async function checkSearchLessons(deps: MemoryServiceDeps, userId: string, query: string): Promise { - if (!config.lessonsEnabled || !deps.lessons) return undefined; + if (!deps.config.lessonsEnabled || !deps.lessons) return undefined; return checkLessons(deps.lessons, userId, query); } @@ -76,6 +75,7 @@ async function executeSearchStep( searchStrategy: retrievalOptions?.searchStrategy, skipRepairLoop: retrievalOptions?.skipRepairLoop, skipReranking: retrievalOptions?.skipReranking, + runtimeConfig: deps.config, }); return { memories: pipelineResult.filtered, activeTrace: pipelineResult.trace }; } @@ -102,7 +102,7 @@ async function postProcessResults( } } - if (!config.consensusValidationEnabled || memories.length < config.consensusMinMemories) { + if (!deps.config.consensusValidationEnabled || memories.length < deps.config.consensusMinMemories) { return { memories }; } @@ -114,7 +114,7 @@ async function postProcessResults( removedCount: consensusResult.removedMemoryIds.length, removedIds: consensusResult.removedMemoryIds, }); - if (config.lessonsEnabled && deps.lessons) { + if (deps.config.lessonsEnabled && deps.lessons) { recordConsensusLessons(deps, userId, consensusResult, memories).catch( (err) => console.error('Consensus lesson recording failed:', err), ); @@ -243,7 +243,7 @@ function recordSearchSideEffects( if (!asOf) { for (const memory of outputMemories) deps.repo.touchMemory(memory.id).catch(() => {}); } - if (config.auditLoggingEnabled) { + if (deps.config.auditLoggingEnabled) { emitAuditEvent('memory:retrieve', userId, { query: query.slice(0, 200), resultCount: outputMemories.length, @@ -291,7 +291,7 @@ export async function performSearch( return { memories: [], injectionText: '', citations: [], retrievalMode: retrievalOptions?.retrievalMode ?? 'flat', lessonCheck }; } - const { limit: effectiveLimit, classification } = resolveSearchLimitDetailed(query, limit); + const { limit: effectiveLimit, classification } = resolveSearchLimitDetailed(query, limit, deps.config); const trace = new TraceCollector(query, userId); trace.event('query-classification', { label: classification.label, limit: effectiveLimit, matchedMarker: classification.matchedMarker }); @@ -340,7 +340,7 @@ export async function performWorkspaceSearch( retrievalOptions?: RetrievalOptions; } = {}, ): Promise { - const { limit: effectiveLimit } = resolveSearchLimitDetailed(query, options.limit); + const { limit: effectiveLimit } = resolveSearchLimitDetailed(query, options.limit, deps.config); const queryEmbedding = await embedText(query, 'query'); const memories = await deps.repo.searchSimilarInWorkspace( diff --git a/src/services/memory-service-types.ts b/src/services/memory-service-types.ts index f571705..56fa387 100644 --- a/src/services/memory-service-types.ts +++ b/src/services/memory-service-types.ts @@ -142,6 +142,7 @@ export interface RetrievalObservability { * Exposes the repositories and optional services needed by ingest, search, and CRUD. */ export interface MemoryServiceDeps { + config: import('../app/runtime-container.js').CoreRuntimeConfig & IngestRuntimeConfig; repo: import('../db/memory-repository.js').MemoryRepository; claims: import('../db/claim-repository.js').ClaimRepository; entities: import('../db/repository-entities.js').EntityRepository | null; @@ -149,3 +150,19 @@ export interface MemoryServiceDeps { observationService: import('./observation-service.js').ObservationService | null; uriResolver: import('./atomicmem-uri.js').URIResolver; } + +/** Explicit ingest/runtime config subset threaded through current ingest seams. */ +export interface IngestRuntimeConfig { + audnCandidateThreshold: number; + auditLoggingEnabled: boolean; + compositeGroupingEnabled: boolean; + compositeMinClusterSize: number; + entityGraphEnabled: boolean; + entropyGateAlpha: number; + entropyGateEnabled: boolean; + entropyGateThreshold: number; + fastAudnDuplicateThreshold: number; + fastAudnEnabled: boolean; + lessonsEnabled: boolean; + llmModel: string; +} diff --git a/src/services/memory-service.ts b/src/services/memory-service.ts index a8803be..188a347 100644 --- a/src/services/memory-service.ts +++ b/src/services/memory-service.ts @@ -4,6 +4,7 @@ * while keeping each concern in a focused, testable module. */ +import { config } from '../config.js'; import { MemoryRepository } from '../db/memory-repository.js'; import { ClaimRepository } from '../db/claim-repository.js'; import { EntityRepository } from '../db/repository-entities.js'; @@ -36,8 +37,10 @@ export class MemoryService { entities?: EntityRepository, lessons?: LessonRepository, observationService?: ObservationService, + runtimeConfig?: MemoryServiceDeps['config'], ) { this.deps = { + config: runtimeConfig ?? config, repo, claims, entities: entities ?? null, diff --git a/src/services/memory-storage.ts b/src/services/memory-storage.ts index 44d0a23..056acb3 100644 --- a/src/services/memory-storage.ts +++ b/src/services/memory-storage.ts @@ -3,7 +3,6 @@ * These helpers are used by both the ingest pipeline and the AUDN decision executor. */ -import { config } from '../config.js'; import { type ClaimSlotInput } from '../db/claim-repository.js'; import { embedTexts } from './embedding.js'; import { type ExtractedEntity, type ExtractedRelation } from './extraction.js'; @@ -15,6 +14,7 @@ import { inferNamespace, classifyNamespace } from './namespace-retrieval.js'; import { generateL1Overview } from './tiered-context.js'; import { emitAuditEvent } from './audit-events.js'; import { derivePersistedClaimSlot } from './memory-crud.js'; +import { emitLineageEvent } from './memory-lineage.js'; import type { AudnFactContext, ClaimTarget, @@ -29,31 +29,27 @@ export async function storeCanonicalFact( ctx: AudnFactContext, ): Promise<{ outcome: Outcome; memoryId: string | null }> { const { userId, fact, embedding, sourceSite, sourceUrl, episodeId, trustScore, claimSlot, logicalTimestamp } = ctx; - const cmoId = await deps.repo.storeCanonicalMemoryObject({ + const lineage = await emitLineageEvent({ claims: deps.claims, repo: deps.repo, config: deps.config }, { + kind: 'canonical-add', userId, - objectFamily: 'ingested_fact', - canonicalPayload: buildCanonicalPayload(fact), - provenance: { episodeId, sourceSite, sourceUrl }, - observedAt: logicalTimestamp, - lineage: { mutationType: 'add', previousObjectId: null }, + fact, + embedding, + sourceSite, + sourceUrl, + episodeId, + logicalTimestamp, + claimSlot: claimSlot ?? null, + createProjection: async (cmoId) => + storeProjection(deps, userId, fact, embedding, sourceSite, sourceUrl, episodeId, trustScore, cmoId), }); - const memoryId = await storeProjection(deps, userId, fact, embedding, sourceSite, sourceUrl, episodeId, trustScore, cmoId); - if (!memoryId) return { outcome: 'skipped', memoryId: null }; - const claimId = await deps.claims.createClaim(userId, fact.type, logicalTimestamp, claimSlot); - const versionId = await deps.claims.createClaimVersion({ - claimId, userId, memoryId, content: fact.fact, embedding, - importance: fact.importance, sourceSite, sourceUrl, episodeId, - validFrom: logicalTimestamp, - provenance: { mutationType: 'add', actorModel: config.llmModel }, - }); - await deps.claims.setClaimCurrentVersion(claimId, versionId, 'active', logicalTimestamp); - await deps.claims.addEvidence({ claimVersionId: versionId, episodeId, memoryId, quoteText: fact.fact }); - if (config.entityGraphEnabled && deps.entities) { + if (!lineage?.memoryId) return { outcome: 'skipped', memoryId: null }; + const memoryId = lineage.memoryId; + if (deps.config.entityGraphEnabled && deps.entities) { await resolveAndLinkEntities(deps, userId, memoryId, fact.entities, fact.relations, embedding); if (!claimSlot) { const persistedSlot = await derivePersistedClaimSlot(deps, userId, memoryId); if (persistedSlot) { - await deps.claims.updateClaimSlot(userId, claimId, persistedSlot); + await deps.claims.updateClaimSlot(userId, lineage.claimId, persistedSlot); } } } @@ -66,44 +62,6 @@ export async function storeCanonicalFact( return { outcome: 'stored', memoryId }; } -function buildCanonicalPayload(fact: FactInput) { - return { - factText: fact.fact, - factType: fact.type, - headline: fact.headline, - keywords: fact.keywords, - }; -} - -export async function createMutationCanonicalObject( - deps: MemoryServiceDeps, - userId: string, - fact: FactInput, - sourceSite: string, - sourceUrl: string, - episodeId: string, - logicalTimestamp: Date | undefined, - lineage: { - mutationType: 'update' | 'supersede' | 'delete'; - previousObjectId: string | null; - claimId: string; - claimVersionId: string; - previousVersionId: string; - mutationReason: string; - contradictionConfidence?: number | null; - actorModel?: string | null; - }, -): Promise { - return deps.repo.storeCanonicalMemoryObject({ - userId, - objectFamily: 'ingested_fact', - canonicalPayload: buildCanonicalPayload(fact), - provenance: { episodeId, sourceSite, sourceUrl }, - observedAt: logicalTimestamp, - lineage: { ...lineage, actorModel: config.llmModel }, - }); -} - export async function storeProjection( deps: MemoryServiceDeps, userId: string, @@ -115,7 +73,7 @@ export async function storeProjection( trustScore: number, cmoId?: string, ): Promise { - const namespace = config.namespaceClassificationEnabled + const namespace = deps.config.namespaceClassificationEnabled ? await classifyNamespace(fact.fact, sourceSite, fact.keywords) : inferNamespace(fact.fact, sourceSite, fact.keywords); @@ -153,7 +111,7 @@ export async function storeProjection( }))); } - if (config.auditLoggingEnabled) { + if (deps.config.auditLoggingEnabled) { emitAuditEvent('memory:ingest', userId, { factType: fact.type, importance: fact.importance, trustScore, }, { memoryId, sourceSite }); @@ -302,20 +260,29 @@ export async function ensureClaimTarget(deps: MemoryServiceDeps, userId: string, const version = await deps.claims.getClaimVersionByMemoryId(userId, memoryId); if (version) return { claimId: version.claim_id, versionId: version.id, memoryId, cmoId }; - const claimId = await deps.claims.createClaim(userId, memory.memory_type, memory.created_at); - const versionId = await deps.claims.createClaimVersion({ - claimId, userId, memoryId: memory.id, content: memory.content, embedding: memory.embedding, - importance: memory.importance, sourceSite: memory.source_site, sourceUrl: memory.source_url, - episodeId: memory.episode_id ?? undefined, validFrom: memory.created_at, + const lineage = await emitLineageEvent({ claims: deps.claims, config: deps.config }, { + kind: 'claim-backfill', + userId, + memory: { + id: memory.id, + content: memory.content, + embedding: memory.embedding, + importance: memory.importance, + sourceSite: memory.source_site, + sourceUrl: memory.source_url, + episodeId: memory.episode_id ?? undefined, + createdAt: memory.created_at, + memoryType: memory.memory_type, + cmoId, + }, }); - await deps.claims.setClaimCurrentVersion(claimId, versionId, 'active', memory.created_at); - await deps.claims.addEvidence({ claimVersionId: versionId, episodeId: memory.episode_id ?? undefined, memoryId: memory.id, quoteText: memory.content }); - return { claimId, versionId, memoryId: memory.id, cmoId }; + if (!lineage) throw new Error(`Claim backfill unexpectedly skipped for memory: ${memory.id}`); + return { claimId: lineage.claimId, versionId: lineage.versionId, memoryId: memory.id, cmoId }; } export async function findConflictCandidates(deps: MemoryServiceDeps, userId: string, factText: string, embedding: number[]): Promise { const [vectorCandidates, keywordCandidates] = await Promise.all([ - deps.repo.findNearDuplicates(userId, embedding, config.audnCandidateThreshold), + deps.repo.findNearDuplicates(userId, embedding, deps.config.audnCandidateThreshold), deps.repo.findKeywordCandidates(userId, extractConflictKeywords(factText)), ]); return mergeCandidates(vectorCandidates, keywordCandidates); diff --git a/src/services/query-expansion.ts b/src/services/query-expansion.ts index 0d3b47b..a8a7b11 100644 --- a/src/services/query-expansion.ts +++ b/src/services/query-expansion.ts @@ -18,11 +18,17 @@ */ import { config } from '../config.js'; +import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import type { EntityRepository } from '../db/repository-entities.js'; import type { MemoryRepository, SearchResult } from '../db/memory-repository.js'; import { llm } from './llm.js'; import { embedText } from './embedding.js'; +type SearchExpansionRuntimeConfig = Pick< + CoreRuntimeConfig, + 'queryExpansionMinSimilarity' | 'queryAugmentationMaxEntities' | 'queryAugmentationMinSimilarity' +>; + const ENTITY_EXTRACTION_PROMPT = 'Extract entity names and conceptual topics from this search query. ' + 'Return a JSON object with two arrays: ' + @@ -82,6 +88,7 @@ async function findEntitiesByTerms( userId: string, terms: string[], limit: number, + runtimeConfig: SearchExpansionRuntimeConfig = config, ): Promise { if (terms.length === 0) return []; @@ -89,7 +96,7 @@ async function findEntitiesByTerms( for (const term of terms) { const embedding = await embedText(term); const matches = await entityRepo.searchEntities( - userId, embedding, limit, config.queryExpansionMinSimilarity, + userId, embedding, limit, runtimeConfig.queryExpansionMinSimilarity, ); for (const match of matches) { allIds.add(match.id); @@ -110,6 +117,7 @@ export async function expandQueryViaEntities( queryEmbedding: number[], excludeIds: Set, budget: number, + runtimeConfig: SearchExpansionRuntimeConfig = config, ): Promise<{ memories: SearchResult[]; expansion: QueryExpansionResult }> { const { entities, concepts } = await extractQueryTerms(query); const allTerms = [...entities, ...concepts]; @@ -121,7 +129,13 @@ export async function expandQueryViaEntities( }; } - const matchedEntityIds = await findEntitiesByTerms(entityRepo, userId, allTerms, 10); + const matchedEntityIds = await findEntitiesByTerms( + entityRepo, + userId, + allTerms, + 10, + runtimeConfig, + ); if (matchedEntityIds.length === 0) { return { @@ -183,12 +197,13 @@ export async function augmentQueryWithEntities( userId: string, query: string, queryEmbedding: number[], + runtimeConfig: SearchExpansionRuntimeConfig = config, ): Promise { const matches = await entityRepo.searchEntities( userId, queryEmbedding, - config.queryAugmentationMaxEntities, - config.queryAugmentationMinSimilarity, + runtimeConfig.queryAugmentationMaxEntities, + runtimeConfig.queryAugmentationMinSimilarity, ); const matchedEntities = matches.map((e) => ({ diff --git a/src/services/reranker.ts b/src/services/reranker.ts index 907b7b4..f38d5d5 100644 --- a/src/services/reranker.ts +++ b/src/services/reranker.ts @@ -9,37 +9,47 @@ */ import type { SearchResult } from '../db/memory-repository.js'; -import { config } from '../config.js'; +import { config, type CrossEncoderDtype } from '../config.js'; let tokenizer: Awaited> | null = null; let model: Awaited> | null = null; -let loadedModelId: string | null = null; +let loadedModelKey: string | null = null; let loadPromise: Promise | null = null; /** Serialize ONNX inference to prevent mutex corruption (see onnx-stability-issue.md). */ let inferenceQueue: Promise = Promise.resolve(); +export interface RerankerRuntimeConfig { + crossEncoderModel: string; + crossEncoderDtype: CrossEncoderDtype; +} + async function loadTokenizer(modelId: string) { const { AutoTokenizer } = await import('@huggingface/transformers'); return AutoTokenizer.from_pretrained(modelId); } -async function loadModel(modelId: string) { +async function loadModel(modelId: string, runtimeConfig: RerankerRuntimeConfig) { const { AutoModelForSequenceClassification } = await import('@huggingface/transformers'); return AutoModelForSequenceClassification.from_pretrained(modelId, { - dtype: config.crossEncoderDtype, + dtype: runtimeConfig.crossEncoderDtype, }); } -async function ensureLoaded(): Promise { - const modelId = config.crossEncoderModel; - if (tokenizer && model && loadedModelId === modelId) return; +function buildRerankerConfigKey(runtimeConfig: RerankerRuntimeConfig): string { + return `${runtimeConfig.crossEncoderModel}:${runtimeConfig.crossEncoderDtype}`; +} + +async function ensureLoaded(runtimeConfig: RerankerRuntimeConfig = config): Promise { + const modelId = runtimeConfig.crossEncoderModel; + const modelKey = buildRerankerConfigKey(runtimeConfig); + if (tokenizer && model && loadedModelKey === modelKey) return; if (loadPromise) { await loadPromise; return; } loadPromise = (async () => { - console.log(`[reranker] Loading ${modelId}...`); + console.log(`[reranker] Loading ${modelId} (${runtimeConfig.crossEncoderDtype})...`); const start = Date.now(); - [tokenizer, model] = await Promise.all([loadTokenizer(modelId), loadModel(modelId)]); - loadedModelId = modelId; - console.log(`[reranker] Loaded ${modelId} in ${Date.now() - start}ms`); + [tokenizer, model] = await Promise.all([loadTokenizer(modelId), loadModel(modelId, runtimeConfig)]); + loadedModelKey = modelKey; + console.log(`[reranker] Loaded ${modelId} (${runtimeConfig.crossEncoderDtype}) in ${Date.now() - start}ms`); })(); try { await loadPromise; @@ -59,10 +69,11 @@ function sigmoid(x: number): number { export async function rerankCandidates( query: string, candidates: SearchResult[], + runtimeConfig: RerankerRuntimeConfig = config, ): Promise { if (candidates.length === 0) return candidates; - await ensureLoaded(); + await ensureLoaded(runtimeConfig); const start = Date.now(); const queries = candidates.map(() => query); @@ -102,7 +113,7 @@ export async function rerankCandidates( const ms = Date.now() - start; console.log( - `[reranker] Scored ${candidates.length} candidates with ${loadedModelId} in ${ms}ms (top: ${reranked[0]?.score.toFixed(3)})`, + `[reranker] Scored ${candidates.length} candidates with ${runtimeConfig.crossEncoderModel} (${runtimeConfig.crossEncoderDtype}) in ${ms}ms (top: ${reranked[0]?.score.toFixed(3)})`, ); return reranked; diff --git a/src/services/retrieval-format.ts b/src/services/retrieval-format.ts index 9ce6619..ef45763 100644 --- a/src/services/retrieval-format.ts +++ b/src/services/retrieval-format.ts @@ -76,6 +76,10 @@ export interface RetrievalCitation { importance: number; } +export interface RetrievalFormatOptions { + stagedLoadingEnabled?: boolean; +} + export function buildCitations(memories: SearchResult[]): RetrievalCitation[] { return memories.map((memory) => ({ memory_id: memory.id, @@ -203,9 +207,13 @@ function formatDuration(days: number): string { return `~${months} month${months !== 1 ? 's' : ''} (${days} days)`; } -export function formatInjection(memories: SearchResult[]): string { +export function formatInjection( + memories: SearchResult[], + options: RetrievalFormatOptions = {}, +): string { if (memories.length === 0) return ''; - if (config.stagedLoadingEnabled) return formatStagedInjection(memories); + const stagedLoadingEnabled = options.stagedLoadingEnabled ?? config.stagedLoadingEnabled; + if (stagedLoadingEnabled) return formatStagedInjection(memories); return formatFullInjection(memories); } diff --git a/src/services/retrieval-policy.ts b/src/services/retrieval-policy.ts index 835f6e5..a853151 100644 --- a/src/services/retrieval-policy.ts +++ b/src/services/retrieval-policy.ts @@ -2,7 +2,7 @@ * Adaptive retrieval and repair-loop policy helpers. */ -import { config } from '../config.js'; +import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import type { SearchResult } from '../db/memory-repository.js'; import { isTemporalOrderingQuery } from './temporal-query-expansion.js'; @@ -61,28 +61,40 @@ export interface ResolvedLimit { classification: QueryClassification; } -export function resolveSearchLimit(query: string, requestedLimit: number | undefined): number { - return resolveSearchLimitDetailed(query, requestedLimit).limit; +export function resolveSearchLimit( + query: string, + requestedLimit: number | undefined, + runtimeConfig: Pick, +): number { + return resolveSearchLimitDetailed(query, requestedLimit, runtimeConfig).limit; } -export function resolveSearchLimitDetailed(query: string, requestedLimit: number | undefined): ResolvedLimit { +export function resolveSearchLimitDetailed( + query: string, + requestedLimit: number | undefined, + runtimeConfig: Pick, +): ResolvedLimit { if (requestedLimit !== undefined) { - return { limit: clampLimit(requestedLimit), classification: { limit: requestedLimit, label: 'medium' } }; + return { limit: clampLimit(requestedLimit, runtimeConfig.maxSearchResults), classification: { limit: requestedLimit, label: 'medium' } }; } - if (!config.adaptiveRetrievalEnabled) { - return { limit: clampLimit(config.maxSearchResults), classification: { limit: config.maxSearchResults, label: 'medium' } }; + if (!runtimeConfig.adaptiveRetrievalEnabled) { + return { limit: clampLimit(runtimeConfig.maxSearchResults, runtimeConfig.maxSearchResults), classification: { limit: runtimeConfig.maxSearchResults, label: 'medium' } }; } const classification = classifyQueryDetailed(query); // Aggregation queries bypass the normal maxSearchResults clamp to improve // recall for count/sum/list-all questions spanning many sessions. const limit = classification.label === 'aggregation' ? Math.max(1, Math.min(AGGREGATION_HARD_CAP, classification.limit)) - : clampLimit(classification.limit); + : clampLimit(classification.limit, runtimeConfig.maxSearchResults); return { limit, classification }; } -export function shouldRunRepairLoop(query: string, memories: SearchResult[]): boolean { - if (!config.repairLoopEnabled) return false; +export function shouldRunRepairLoop( + query: string, + memories: SearchResult[], + runtimeConfig: Pick, +): boolean { + if (!runtimeConfig.repairLoopEnabled) return false; // Selective repair: only escalate queries where the rewrite improves retrieval. // Multi-hop and aggregation always benefit. Complex queries benefit unless they // are temporal-ordering (the rewrite strips time-specific phrasing and hurts @@ -93,8 +105,8 @@ export function shouldRunRepairLoop(query: string, memories: SearchResult[]): bo || (classification.label === 'complex' && !isTemporalOrderingQuery(query)); if (!isEligible) return false; if (memories.length === 0) return true; - if (memories[0].similarity < config.repairLoopMinSimilarity) return true; - return isComplexQuery(query.toLowerCase()) && memories.length < resolveSearchLimit(query, undefined); + if (memories[0].similarity < runtimeConfig.repairLoopMinSimilarity) return true; + return isComplexQuery(query.toLowerCase()) && memories.length < resolveSearchLimit(query, undefined, runtimeConfig); } export interface RepairDecision { @@ -121,6 +133,7 @@ export interface RepairDecision { export function shouldAcceptRepair( initial: SearchResult[], repaired: SearchResult[], + runtimeConfig: Pick, ): RepairDecision { const initialTopSim = initial.length > 0 ? initial[0].similarity : 0; const repairedTopSim = repaired.length > 0 ? repaired[0].similarity : 0; @@ -133,27 +146,35 @@ export function shouldAcceptRepair( return { ...base, accepted: false, reason: 'sabotage-detected' }; } - const deltaThreshold = config.repairDeltaThreshold || 0.01; + const deltaThreshold = runtimeConfig.repairDeltaThreshold || 0.01; if (simDelta < deltaThreshold) { return { ...base, accepted: false, reason: 'delta-below-threshold' }; } - if (config.repairConfidenceFloor > 0 && repairedTopSim < config.repairConfidenceFloor) { + if (runtimeConfig.repairConfidenceFloor > 0 && repairedTopSim < runtimeConfig.repairConfidenceFloor) { return { ...base, accepted: false, reason: 'below-confidence-floor' }; } return { ...base, accepted: true, reason: 'accepted' }; } -export function mergeSearchResults(primary: SearchResult[], repair: SearchResult[], limit: number): SearchResult[] { +export function mergeSearchResults( + primary: SearchResult[], + repair: SearchResult[], + limit: number, + runtimeConfig: Pick, +): SearchResult[] { const merged = new Map(); - mergeWeightedResults(merged, primary, config.retrievalProfileSettings.repairPrimaryWeight); - mergeWeightedResults(merged, repair, config.retrievalProfileSettings.repairRewriteWeight); + mergeWeightedResults(merged, primary, runtimeConfig.retrievalProfileSettings.repairPrimaryWeight); + mergeWeightedResults(merged, repair, runtimeConfig.retrievalProfileSettings.repairRewriteWeight); return [...merged.values()].sort((left, right) => right.score - left.score).slice(0, clampLimitWide(limit)); } -export function resolveRerankDepth(limit: number): number { - return Math.max(clampLimitWide(limit), config.retrievalProfileSettings.rerankDepth); +export function resolveRerankDepth( + limit: number, + runtimeConfig: Pick, +): number { + return Math.max(clampLimitWide(limit), runtimeConfig.retrievalProfileSettings.rerankDepth); } export type QueryComplexityLabel = 'simple' | 'medium' | 'complex' | 'multi-hop' | 'aggregation'; @@ -196,8 +217,8 @@ export function isAggregationQuery(lowerQuery: string): boolean { return AGGREGATION_MARKERS.some((marker) => lowerQuery.includes(marker)); } -function clampLimit(limit: number): number { - return Math.max(1, Math.min(config.maxSearchResults, Math.floor(limit))); +function clampLimit(limit: number, maxSearchResults: number): number { + return Math.max(1, Math.min(maxSearchResults, Math.floor(limit))); } /** Wider clamp for pipeline internals — respects aggregation ceiling, not profile cap. */ diff --git a/src/services/search-pipeline.ts b/src/services/search-pipeline.ts index 91e954f..351881a 100644 --- a/src/services/search-pipeline.ts +++ b/src/services/search-pipeline.ts @@ -6,6 +6,7 @@ */ import { config } from '../config.js'; +import type { CoreRuntimeConfig } from '../app/runtime-container.js'; import { MemoryRepository, type SearchResult } from '../db/memory-repository.js'; import { EntityRepository } from '../db/repository-entities.js'; import { embedText } from './embedding.js'; @@ -39,6 +40,40 @@ const TEMPORAL_NEIGHBOR_WINDOW_MINUTES = 30; const SEMANTIC_RRF_WEIGHT = 1.2; const ENTITY_RRF_WEIGHT = 1.3; const KEYWORD_RRF_WEIGHT = 1.0; + +export type SearchPipelineRuntimeConfig = Pick< + CoreRuntimeConfig, + | 'adaptiveRetrievalEnabled' + | 'agenticRetrievalEnabled' + | 'crossEncoderDtype' + | 'crossEncoderEnabled' + | 'crossEncoderModel' + | 'entityGraphEnabled' + | 'entitySearchMinSimilarity' + | 'hybridSearchEnabled' + | 'iterativeRetrievalEnabled' + | 'linkExpansionBeforeMMR' + | 'linkExpansionEnabled' + | 'linkExpansionMax' + | 'linkSimilarityThreshold' + | 'maxSearchResults' + | 'mmrEnabled' + | 'mmrLambda' + | 'pprDamping' + | 'pprEnabled' + | 'queryAugmentationEnabled' + | 'queryAugmentationMaxEntities' + | 'queryAugmentationMinSimilarity' + | 'queryExpansionEnabled' + | 'queryExpansionMinSimilarity' + | 'repairConfidenceFloor' + | 'repairDeltaThreshold' + | 'repairLoopEnabled' + | 'repairLoopMinSimilarity' + | 'rerankSkipMinGap' + | 'rerankSkipTopSimilarity' + | 'retrievalProfileSettings' +>; /** * Decide whether to auto-skip cross-encoder reranking. * Skip when the top vector result is high-confidence and well-separated @@ -46,11 +81,15 @@ const KEYWORD_RRF_WEIGHT = 1.0; * Thresholds are configurable via RERANK_SKIP_TOP_SIMILARITY (default 0.85) * and RERANK_SKIP_MIN_GAP (default 0.05). Saves ~150ms per query on CPU. */ -function shouldAutoSkipReranking(results: SearchResult[]): boolean { +function shouldAutoSkipReranking( + results: SearchResult[], + policyConfig: Pick = config, +): boolean { if (results.length < 2) return true; const topSim = results[0]?.score ?? 0; const secondSim = results[1]?.score ?? 0; - return topSim >= config.rerankSkipTopSimilarity && (topSim - secondSim) >= config.rerankSkipMinGap; + return topSim >= policyConfig.rerankSkipTopSimilarity + && (topSim - secondSim) >= policyConfig.rerankSkipMinGap; } export interface SearchPipelineOptions { @@ -61,6 +100,13 @@ export interface SearchPipelineOptions { skipRepairLoop?: boolean; /** Skip cross-encoder reranking for latency-critical paths. */ skipReranking?: boolean; + /** + * Runtime-owned config threaded through all search-pipeline helpers. + * When present, gates and thresholds across the entire retrieval path + * read from this instead of the static module-level config singleton. + * Falls back to the static config import if omitted. + */ + runtimeConfig?: SearchPipelineRuntimeConfig; } /** @@ -77,32 +123,33 @@ export async function runSearchPipelineWithTrace( options: SearchPipelineOptions = {}, ): Promise<{ filtered: SearchResult[]; trace: TraceCollector }> { const trace = new TraceCollector(query, userId); - const mmrPoolMultiplier = config.mmrEnabled ? 3 : 1; - const candidateDepth = resolveRerankDepth(limit) * mmrPoolMultiplier; + const policyConfig: SearchPipelineRuntimeConfig = options.runtimeConfig ?? config; + const mmrPoolMultiplier = policyConfig.mmrEnabled ? 3 : 1; + const candidateDepth = resolveRerankDepth(limit, policyConfig) * mmrPoolMultiplier; // Phase 1: Embed the raw query to use for entity matching const rawQueryEmbedding = await timed('search.embed', () => embedText(query, 'query')); // Phase 2: Entity-grounded query augmentation (zero-LLM) const augmentation = await timed('search.augmentation', () => applyQueryAugmentation( - entityRepo, userId, query, rawQueryEmbedding, trace, + entityRepo, userId, query, rawQueryEmbedding, trace, policyConfig, )); const queryEmbedding = augmentation.augmentedEmbedding; const searchQuery = augmentation.searchQuery; const initialResults = await timed('search.vector', () => runInitialRetrieval( - repo, entityRepo, userId, searchQuery, queryEmbedding, candidateDepth, sourceSite, referenceTime, options.searchStrategy, + repo, entityRepo, userId, searchQuery, queryEmbedding, candidateDepth, sourceSite, referenceTime, options.searchStrategy, policyConfig, )); const seededResults = await timed('search.hybrid-fallback', () => maybeApplyAbstractHybridFallback( repo, entityRepo, userId, query, searchQuery, queryEmbedding, candidateDepth, sourceSite, referenceTime, - options.retrievalMode, options.searchStrategy, initialResults, trace, + options.retrievalMode, options.searchStrategy, initialResults, trace, policyConfig, )); console.log(`[search] Query: "${query}", Results: ${seededResults.length}`); trace.stage('initial', seededResults, { candidateDepth, - hybrid: config.hybridSearchEnabled, + hybrid: policyConfig.hybridSearchEnabled, augmentation: { searchQuery, matched: searchQuery !== query, @@ -111,7 +158,7 @@ export async function runSearchPipelineWithTrace( // Entity name co-retrieval const withCoRetrieval = await timed('search.co-retrieval', () => applyEntityNameCoRetrieval( - repo, entityRepo, userId, query, queryEmbedding, seededResults, candidateDepth, trace, + repo, entityRepo, userId, query, queryEmbedding, seededResults, candidateDepth, trace, policyConfig, )); const withSubjectExpansion = await timed('search.subject-query-expansion', () => applySubjectQueryExpansion( @@ -128,7 +175,7 @@ export async function runSearchPipelineWithTrace( // Query expansion const withExpansion = await timed('search.query-expansion', () => applyQueryExpansion( - repo, entityRepo, userId, query, queryEmbedding, temporalExpansion.memories, candidateDepth, trace, + repo, entityRepo, userId, query, queryEmbedding, temporalExpansion.memories, candidateDepth, trace, policyConfig, )); const repaired = options.skipRepairLoop @@ -144,12 +191,13 @@ export async function runSearchPipelineWithTrace( sourceSite, referenceTime, trace, + policyConfig, options.searchStrategy, temporalExpansion.temporalAnchorFingerprints, )); const iterated = await timed('search.iterative-retrieval', async () => { - if (!config.iterativeRetrievalEnabled) return repaired.memories; + if (!policyConfig.iterativeRetrievalEnabled) return repaired.memories; const iterative = await applyIterativeRetrieval( repo, userId, @@ -172,9 +220,9 @@ export async function runSearchPipelineWithTrace( // Agentic multi-round retrieval const results = await timed('search.agentic-retrieval', async () => { - if (!config.agenticRetrievalEnabled) return iterated; + if (!policyConfig.agenticRetrievalEnabled) return iterated; const agenticResult = await applyAgenticRetrieval( - repo, userId, query, iterated, candidateDepth, sourceSite, referenceTime, + repo, userId, query, iterated, candidateDepth, sourceSite, referenceTime, policyConfig, ); if (agenticResult.triggered) { trace.stage('agentic-retrieval', agenticResult.memories, { @@ -197,6 +245,7 @@ export async function runSearchPipelineWithTrace( temporalExpansion.temporalAnchorFingerprints, trace, options.skipReranking, + policyConfig, )); const namespaceScope = options.namespaceScope ?? null; @@ -226,6 +275,7 @@ async function runInitialRetrieval( sourceSite?: string, referenceTime?: Date, searchStrategy: SearchStrategy = 'memory', + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { if (searchStrategy === 'fact-hybrid') { return repo.searchAtomicFactsHybrid( @@ -246,7 +296,8 @@ async function runInitialRetrieval( candidateDepth, sourceSite, referenceTime, - config.hybridSearchEnabled, + policyConfig.hybridSearchEnabled, + policyConfig, ); } @@ -264,9 +315,10 @@ async function maybeApplyAbstractHybridFallback( searchStrategy: SearchStrategy | undefined, initialResults: SearchResult[], trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { if (searchStrategy === 'fact-hybrid') return initialResults; - if (config.hybridSearchEnabled || config.entityGraphEnabled) return initialResults; + if (policyConfig.hybridSearchEnabled || policyConfig.entityGraphEnabled) return initialResults; if (!shouldUseAbstractHybridFallback(retrievalMode, rawQuery, initialResults.length)) { return initialResults; } @@ -280,6 +332,7 @@ async function maybeApplyAbstractHybridFallback( sourceSite, referenceTime, true, + policyConfig, ); trace.stage('abstract-hybrid-fallback', fallbackResults, { candidateDepth }); return fallbackResults; @@ -299,10 +352,11 @@ async function applyRepairLoop( sourceSite: string | undefined, referenceTime: Date | undefined, trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig, searchStrategy: SearchStrategy = 'memory', protectedIds: string[] = [], ): Promise<{ memories: SearchResult[]; queryText: string }> { - if (!shouldRunRepairLoop(query, initialResults)) { + if (!shouldRunRepairLoop(query, initialResults, policyConfig)) { return { memories: initialResults, queryText: query }; } @@ -324,18 +378,19 @@ async function applyRepairLoop( candidateDepth, sourceSite, referenceTime, - config.hybridSearchEnabled, + policyConfig.hybridSearchEnabled, + policyConfig, ); - const decision = shouldAcceptRepair(initialResults, repairedResults); + const decision = shouldAcceptRepair(initialResults, repairedResults, policyConfig); if (decision.accepted) { - const mergedPool = mergeStageResults( - initialResults, - repairedResults, - initialResults.length + repairedResults.length, - config.retrievalProfileSettings.repairPrimaryWeight, - config.retrievalProfileSettings.repairRewriteWeight, - ); + const mergedPool = mergeStageResults( + initialResults, + repairedResults, + initialResults.length + repairedResults.length, + policyConfig.retrievalProfileSettings.repairPrimaryWeight, + policyConfig.retrievalProfileSettings.repairRewriteWeight, + ); const merged = preserveProtectedResults( mergedPool.slice(0, candidateDepth), mergedPool, @@ -372,14 +427,15 @@ async function applyQueryExpansion( initialResults: SearchResult[], candidateDepth: number, trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { - if (!config.queryExpansionEnabled || !config.entityGraphEnabled || !entityRepo) { + if (!policyConfig.queryExpansionEnabled || !policyConfig.entityGraphEnabled || !entityRepo) { return initialResults; } const excludeIds = new Set(initialResults.map((r) => r.id)); const { memories, expansion } = await expandQueryViaEntities( - entityRepo, repo, userId, query, queryEmbedding, excludeIds, config.linkExpansionMax, + entityRepo, repo, userId, query, queryEmbedding, excludeIds, policyConfig.linkExpansionMax, policyConfig, ); if (memories.length === 0) { @@ -413,13 +469,14 @@ async function applyQueryAugmentation( query: string, queryEmbedding: number[], trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise<{ searchQuery: string; augmentedEmbedding: number[] }> { - if (!config.queryAugmentationEnabled || !config.entityGraphEnabled || !entityRepo) { + if (!policyConfig.queryAugmentationEnabled || !policyConfig.entityGraphEnabled || !entityRepo) { return { searchQuery: query, augmentedEmbedding: queryEmbedding }; } const result = await augmentQueryWithEntities( - entityRepo, userId, query, queryEmbedding, + entityRepo, userId, query, queryEmbedding, policyConfig, ); if (result.augmentedQuery === query) { @@ -454,12 +511,13 @@ async function applyEntityNameCoRetrieval( initialResults: SearchResult[], candidateDepth: number, trace: TraceCollector, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { - if (!config.entityGraphEnabled || !entityRepo) return initialResults; + if (!policyConfig.entityGraphEnabled || !entityRepo) return initialResults; const excludeIds = new Set(initialResults.map((r) => r.id)); const { memories, matchedNames } = await coRetrieveByEntityNames( - entityRepo, repo, userId, query, queryEmbedding, excludeIds, config.linkExpansionMax, + entityRepo, repo, userId, query, queryEmbedding, excludeIds, policyConfig.linkExpansionMax, ); if (memories.length === 0) { @@ -618,18 +676,23 @@ async function applyExpansionAndReranking( temporalAnchorFingerprints: string[], trace: TraceCollector, skipReranking?: boolean, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { // Cross-encoder reranking: re-score candidates before MMR let candidates = results; let protectedFingerprints = [...temporalAnchorFingerprints]; - const shouldSkipRerank = skipReranking || shouldAutoSkipReranking(results); - if (config.crossEncoderEnabled && !shouldSkipRerank) { - candidates = await rerankCandidates(query, results); + const shouldSkipRerank = skipReranking || shouldAutoSkipReranking(results, policyConfig); + if (policyConfig.crossEncoderEnabled && !shouldSkipRerank) { + const rerankerConfig = { + crossEncoderModel: policyConfig.crossEncoderModel, + crossEncoderDtype: policyConfig.crossEncoderDtype, + }; + candidates = await rerankCandidates(query, results, rerankerConfig); trace.stage('cross-encoder', candidates, { - model: config.crossEncoderModel, - dtype: config.crossEncoderDtype, + model: rerankerConfig.crossEncoderModel, + dtype: rerankerConfig.crossEncoderDtype, }); - } else if (config.crossEncoderEnabled && shouldSkipRerank) { + } else if (policyConfig.crossEncoderEnabled && shouldSkipRerank) { console.log(`[reranker] Skipped: ${skipReranking ? 'explicit' : 'auto-skip (high-confidence results)'}`); } const subjectRanked = applySubjectAwareRanking(query, candidates); @@ -650,34 +713,58 @@ async function applyExpansionAndReranking( candidates = applyConcisenessPenalty(candidates); - if (config.linkExpansionBeforeMMR && config.linkExpansionEnabled && config.mmrEnabled) { - const preExpanded = await expandWithLinks(repo, entityRepo, userId, candidates.slice(0, limit), queryEmbedding, referenceTime); + if (policyConfig.linkExpansionBeforeMMR && policyConfig.linkExpansionEnabled && policyConfig.mmrEnabled) { + const preExpanded = await expandWithLinks( + repo, + entityRepo, + userId, + candidates.slice(0, limit), + queryEmbedding, + referenceTime, + policyConfig, + ); trace.stage('link-expansion', preExpanded, { order: 'before-mmr' }); const selected = preserveProtectedResults( - applyMMR(preExpanded, queryEmbedding, limit, config.mmrLambda), + applyMMR(preExpanded, queryEmbedding, limit, policyConfig.mmrLambda), preExpanded, protectedFingerprints, limit, ); - trace.stage('mmr', selected, { lambda: config.mmrLambda }); + trace.stage('mmr', selected, { lambda: policyConfig.mmrLambda }); return selected; } - if (config.mmrEnabled) { + if (policyConfig.mmrEnabled) { const mmrResults = preserveProtectedResults( - applyMMR(candidates, queryEmbedding, limit, config.mmrLambda), + applyMMR(candidates, queryEmbedding, limit, policyConfig.mmrLambda), candidates, protectedFingerprints, limit, ); - trace.stage('mmr', mmrResults, { lambda: config.mmrLambda }); - const expanded = await expandWithLinks(repo, entityRepo, userId, mmrResults, queryEmbedding, referenceTime); + trace.stage('mmr', mmrResults, { lambda: policyConfig.mmrLambda }); + const expanded = await expandWithLinks( + repo, + entityRepo, + userId, + mmrResults, + queryEmbedding, + referenceTime, + policyConfig, + ); trace.stage('link-expansion', expanded, { order: 'after-mmr' }); return expanded; } const sliced = preserveProtectedResults(candidates.slice(0, limit), candidates, protectedFingerprints, limit); - const expanded = await expandWithLinks(repo, entityRepo, userId, sliced, queryEmbedding, referenceTime); + const expanded = await expandWithLinks( + repo, + entityRepo, + userId, + sliced, + queryEmbedding, + referenceTime, + policyConfig, + ); trace.stage('link-expansion', expanded, { order: 'no-mmr' }); return expanded; } @@ -693,15 +780,16 @@ async function expandWithLinks( results: SearchResult[], queryEmbedding: number[], referenceTime?: Date, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { - if (!config.linkExpansionEnabled || config.linkExpansionMax <= 0) return results; + if (!policyConfig.linkExpansionEnabled || policyConfig.linkExpansionMax <= 0) return results; const resultIds = results.map((r) => r.id); const excludeIds = new Set(resultIds); - const budget = config.linkExpansionMax; + const budget = policyConfig.linkExpansionMax; - const linkedIds = config.pprEnabled - ? await expandViaPPR(repo, results, excludeIds, budget) + const linkedIds = policyConfig.pprEnabled + ? await expandViaPPR(repo, results, excludeIds, budget, policyConfig) : await repo.findLinkedMemoryIds(resultIds, excludeIds, budget); const temporalNeighbors = await repo.findTemporalNeighbors( @@ -722,7 +810,15 @@ async function expandWithLinks( const dedupedTemporal = temporalNeighbors.filter((m) => !seen.has(m.id)); // Entity graph expansion: find entities matching the query and pull in their linked memories - const entityMemories = await expandViaEntities(repo, entityRepo, userId, queryEmbedding, seen, budget); + const entityMemories = await expandViaEntities( + repo, + entityRepo, + userId, + queryEmbedding, + seen, + budget, + policyConfig, + ); const expansions = [...linkedMemories, ...dedupedTemporal, ...entityMemories] .sort((a, b) => b.score - a.score) @@ -741,6 +837,7 @@ async function runMemoryRrfRetrieval( sourceSite: string | undefined, referenceTime: Date | undefined, includeKeywordChannel: boolean, + policyConfig: SearchPipelineRuntimeConfig = config, ): Promise { const semanticResults = await repo.searchSimilar( userId, @@ -753,8 +850,16 @@ async function runMemoryRrfRetrieval( { name: 'semantic', weight: SEMANTIC_RRF_WEIGHT, results: semanticResults }, ]; - if (config.entityGraphEnabled && entityRepo) { - const entityResults = await expandViaEntities(repo, entityRepo, userId, queryEmbedding, new Set(), limit); + if (policyConfig.entityGraphEnabled && entityRepo) { + const entityResults = await expandViaEntities( + repo, + entityRepo, + userId, + queryEmbedding, + new Set(), + limit, + policyConfig, + ); if (entityResults.length > 0) { channels.push({ name: 'entity', weight: ENTITY_RRF_WEIGHT, results: entityResults }); } @@ -782,6 +887,7 @@ async function expandViaPPR( results: SearchResult[], excludeIds: Set, budget: number, + policyConfig: Pick = config, ): Promise { const seedScores = new Map(); for (const r of results) { @@ -791,7 +897,7 @@ async function expandViaPPR( const { scores } = await personalizedPageRank( repo.getPool(), seedScores, - { damping: config.pprDamping }, + { damping: policyConfig.pprDamping }, ); return [...scores.entries()] @@ -839,8 +945,9 @@ export async function generateLinks( userId: string, memoryIds: string[], embeddingCache: Map, + runtimeConfig: Pick = config, ): Promise { - if (!config.linkExpansionEnabled || memoryIds.length === 0) return 0; + if (!runtimeConfig.linkExpansionEnabled || memoryIds.length === 0) return 0; const activeMemoryIds: string[] = []; for (const id of memoryIds) { @@ -856,7 +963,7 @@ export async function generateLinks( if (!embedding) continue; const candidates = await repo.findLinkCandidates( - userId, embedding, config.linkSimilarityThreshold, memoryId, + userId, embedding, runtimeConfig.linkSimilarityThreshold, memoryId, ); for (const candidate of candidates) { allLinks.push({ sourceId: memoryId, targetId: candidate.id, similarity: candidate.similarity }); @@ -878,11 +985,12 @@ async function expandViaEntities( queryEmbedding: number[], excludeIds: Set, budget: number, + policyConfig: Pick = config, ): Promise { - if (!config.entityGraphEnabled || !entityRepo) return []; + if (!policyConfig.entityGraphEnabled || !entityRepo) return []; const matchingEntities = await entityRepo.searchEntities( - userId, queryEmbedding, 5, config.entitySearchMinSimilarity, + userId, queryEmbedding, 5, policyConfig.entitySearchMinSimilarity, ); if (matchingEntities.length === 0) return [];