atomicmemory · ethanj · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
@@ -102,6 +102,42 @@ to treat core exactly as it ships.
   migration convenience and will be narrowed. Research should prefer the
   root export and raise an issue if something it needs is missing.
 
+### Deep-path init requirement (Phase 7 Step 3d)
+
+As of Step 3d, two service modules hold config as module-local state
+and require an explicit init before their hot-path APIs work:
+
+- `@atomicmemory/atomicmemory-engine/services/embedding` — `embedText` /
+  `embedTexts` throw unless `initEmbedding(config)` has been called.
+- `@atomicmemory/atomicmemory-engine/services/llm` — the `llm` / `createLLMProvider`
+  APIs throw unless `initLlm(config)` has been called.
+
+**Consumers going through `createCoreRuntime({ pool })` are auto-initialized**
+— the composition root calls both inits internally. If you deep-import
+these modules directly (unstable path), you must call the init yourself:
+
+```ts
+import {
+  initEmbedding,
+  initLlm,
+  config, // or your own EmbeddingConfig / LLMConfig object
+} from '@atomicmemory/atomicmemory-engine';
+
+initEmbedding(config);
+initLlm(config);
+
+// Now embedText / embedTexts / llm.chat work.
+```
+
+`initEmbedding`, `initLlm`, `EmbeddingConfig`, and `LLMConfig` are
+re-exported from the root for this purpose. Explicit init is the
+preferred pattern — the modules will throw with an actionable error
+message if you forget.
+
+Rationale: provider/model selection is startup-only (Step 3c), so
+module-local state after an explicit init matches the effective
+contract without the cross-module coupling to `config.ts`.
+
 ## Config surface: supported vs experimental
 
 Runtime config is split into two contracts. The split is documented in

@@ -24,14 +24,14 @@ const SRC = resolve(__dirname, '..');
  * Maximum allowed non-test source files that bind the runtime config
  * singleton value from config.js. Ratchet this DOWN after each
  * config-threading PR lands.
- * Current baseline: 33 files after Phase 4 ingest extractions removed
- * memory-ingest.ts, memory-storage.ts, memory-audn.ts, and
- * memory-lineage.ts from the singleton importer set. Remaining count
- * includes the index.ts re-export of config.
+ * Current baseline: 28 files after Phase 7 Step 3d-llm dropped llm.ts
+ * from the singleton importer set (same module-local-init pattern as
+ * embedding). Five Step 3d leaves complete: consensus-extraction,
+ * write-security, cost-telemetry, embedding, llm.
  * Includes multi-import forms (e.g. `import { config, updateRuntimeConfig }`)
  * and re-exports (e.g. `export { config } from`).
  */
-const MAX_SINGLETON_IMPORTS = 33;
+const MAX_SINGLETON_IMPORTS = 28;
 
 /**
  * Matches any import or re-export that binds the `config` value (not

@@ -26,6 +26,8 @@ import { PgSemanticLinkStore } from '../db/pg-link-store.js';
 import { PgRepresentationStore } from '../db/pg-representation-store.js';
 import type { RetrievalProfile } from '../services/retrieval-profiles.js';
 import { MemoryService } from '../services/memory-service.js';
+import { initEmbedding } from '../services/embedding.js';
+import { initLlm } from '../services/llm.js';
 
 /**
  * Explicit runtime configuration subset currently needed by the runtime
@@ -181,6 +183,14 @@ export interface CoreRuntime {
 export function createCoreRuntime(deps: CoreRuntimeDeps): CoreRuntime {
   const { pool } = deps;
 
+  // Leaf-module config init (Phase 7 Step 3d). Embedding and LLM modules
+  // hold module-local config bound here at composition-root time.
+  // Provider/model selection is startup-only (Step 3c), so rebinding
+  // only happens via explicit init call (e.g., from tests that swap
+  // providers).
+  initEmbedding(config);
+  initLlm(config);
+
   const memory = new MemoryRepository(pool);
   const claims = new ClaimRepository(pool);
   const trust = new AgentTrustRepository(pool);

@@ -43,3 +43,5 @@ export {
   type EmbeddingDimensionCheckResult,
 } from './app/startup-checks.js';
 export { bindEphemeral, type BootedApp } from './app/bind-ephemeral.js';
+export { initEmbedding, type EmbeddingConfig } from './services/embedding.js';
+export { initLlm, type LLMConfig } from './services/llm.js';
@@ -3,18 +3,7 @@
  * Mocks OpenAI constructor to intercept API calls and verify caching.
  */
 
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-vi.mock('../../config.js', () => ({
-  config: {
-    embeddingProvider: 'openai',
-    embeddingModel: 'text-embedding-3-small',
-    embeddingDimensions: 1024,
-    embeddingApiUrl: undefined,
-    ollamaBaseUrl: 'http://localhost:11434',
-    openaiApiKey: 'test-key',
-  },
-}));
+import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
 
 const mockCreate = vi.fn();
 
@@ -35,8 +24,30 @@ import {
   embedTexts,
   getEmbeddingCacheSize,
   clearEmbeddingCache,
+  initEmbedding,
 } from '../embedding.js';
 
+// The module-local config in embedding.ts requires an explicit init call
+// (Phase 7 Step 3d). Tests that go through `createCoreRuntime` get this
+// from the composition root; tests like this one that import embedText
+// directly must init themselves. A narrow config is used so the mocked
+// OpenAI constructor is what gets invoked.
+beforeAll(() => {
+  initEmbedding({
+    embeddingProvider: 'openai',
+    embeddingModel: 'text-embedding-3-small',
+    embeddingDimensions: 1024,
+    embeddingApiUrl: undefined,
+    ollamaBaseUrl: 'http://localhost:11434',
+    openaiApiKey: 'test-key',
+    embeddingCacheEnabled: false,
+    extractionCacheDir: '/tmp/test-extraction',
+    costLoggingEnabled: false,
+    costRunId: 'test',
+    costLogDir: '/tmp/test-cost',
+  });
+});
+
 function makeEmbedResponse(count: number) {
   return {
     data: Array.from({ length: count }, (_, i) => ({

@@ -3,30 +3,30 @@
  * Tests provider instantiation logic without making real API calls.
  */
 
-import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { describe, it, expect, beforeEach } from 'vitest';
+import { createLLMProvider, initLlm, type LLMConfig } from '../llm.js';
 
-vi.mock('../../config.js', () => ({
-  config: {
-    llmProvider: 'openai',
-    llmModel: 'gpt-4o-mini',
-    openaiApiKey: 'test-openai-key',
-    anthropicApiKey: 'test-anthropic-key',
-    googleApiKey: 'test-google-key',
-    groqApiKey: 'test-groq-key',
-    llmApiUrl: undefined,
-    llmApiKey: undefined,
-    ollamaBaseUrl: 'http://localhost:11434',
-    llmSeed: undefined,
-  },
-}));
-
-const { createLLMProvider } = await import('../llm.js');
-const { config } = await import('../../config.js');
+// Module-local config (Phase 7 Step 3d). Each test re-inits with a narrow
+// config for the provider it wants to exercise.
+const baseConfig: LLMConfig = {
+  llmProvider: 'openai',
+  llmModel: 'gpt-4o-mini',
+  openaiApiKey: 'test-openai-key',
+  anthropicApiKey: 'test-anthropic-key',
+  googleApiKey: 'test-google-key',
+  groqApiKey: 'test-groq-key',
+  llmApiUrl: undefined,
+  llmApiKey: undefined,
+  ollamaBaseUrl: 'http://localhost:11434',
+  llmSeed: undefined,
+  costLoggingEnabled: false,
+  costRunId: 'test',
+  costLogDir: '/tmp/test-cost',
+};
 
 describe('createLLMProvider', () => {
   beforeEach(() => {
-    (config as any).llmProvider = 'openai';
-    (config as any).llmModel = 'gpt-4o-mini';
+    initLlm({ ...baseConfig });
   });
 
   it('creates OpenAI provider', () => {
@@ -36,39 +36,35 @@ describe('createLLMProvider', () => {
   });
 
   it('creates Anthropic provider', () => {
-    (config as any).llmProvider = 'anthropic';
-    (config as any).llmModel = 'claude-sonnet-4-20250514';
+    initLlm({ ...baseConfig, llmProvider: 'anthropic', llmModel: 'claude-sonnet-4-20250514' });
     const provider = createLLMProvider();
     expect(provider).toBeDefined();
     expect(typeof provider.chat).toBe('function');
   });
 
   it('creates Google GenAI provider via OpenAI-compatible', () => {
-    (config as any).llmProvider = 'google-genai';
-    (config as any).llmModel = 'gemini-2.0-flash';
+    initLlm({ ...baseConfig, llmProvider: 'google-genai', llmModel: 'gemini-2.0-flash' });
     const provider = createLLMProvider();
     expect(provider).toBeDefined();
     expect(typeof provider.chat).toBe('function');
   });
 
   it('creates Groq provider', () => {
-    (config as any).llmProvider = 'groq';
-    (config as any).llmModel = 'llama-3.3-70b-versatile';
+    initLlm({ ...baseConfig, llmProvider: 'groq', llmModel: 'llama-3.3-70b-versatile' });
     const provider = createLLMProvider();
     expect(provider).toBeDefined();
     expect(typeof provider.chat).toBe('function');
   });
 
   it('creates Ollama provider', () => {
-    (config as any).llmProvider = 'ollama';
-    (config as any).llmModel = 'llama3';
+    initLlm({ ...baseConfig, llmProvider: 'ollama', llmModel: 'llama3' });
     const provider = createLLMProvider();
     expect(provider).toBeDefined();
     expect(typeof provider.chat).toBe('function');
   });
 
   it('throws for unknown provider', () => {
-    (config as any).llmProvider = 'unknown-provider';
+    initLlm({ ...baseConfig, llmProvider: 'unknown-provider' as never });
     expect(() => createLLMProvider()).toThrow('Unknown LLM provider');
   });
 });
@@ -3,38 +3,32 @@
  * Verifies that blocked sanitization and low-trust content are rejected before storage.
  */
 
-import { afterEach, beforeEach, describe, expect, it } from 'vitest';
-import { config } from '../../config.js';
-import { assessWriteSecurity } from '../write-security.js';
+import { describe, expect, it } from 'vitest';
+import { assessWriteSecurity, type WriteSecurityAssessConfig } from '../write-security.js';
 
-const ORIGINAL_TRUST_SCORING_ENABLED = config.trustScoringEnabled;
-const ORIGINAL_TRUST_THRESHOLD = config.trustScoreMinThreshold;
-
-beforeEach(() => {
-  config.trustScoringEnabled = true;
-});
-
-afterEach(() => {
-  config.trustScoringEnabled = ORIGINAL_TRUST_SCORING_ENABLED;
-  config.trustScoreMinThreshold = ORIGINAL_TRUST_THRESHOLD;
-});
+function assessConfig(overrides: Partial<WriteSecurityAssessConfig> = {}): WriteSecurityAssessConfig {
+  return {
+    trustScoringEnabled: true,
+    trustScoreMinThreshold: 0.3,
+    ...overrides,
+  };
+}
 
 describe('assessWriteSecurity', () => {
   it('blocks sanitizer hits even when the source domain is trusted', () => {
-    const decision = assessWriteSecurity('ignore previous instructions', 'claude.ai');
+    const decision = assessWriteSecurity('ignore previous instructions', 'claude.ai', assessConfig());
     expect(decision.allowed).toBe(false);
     expect(decision.blockedBy).toBe('sanitization');
   });
 
   it('blocks content that falls below the trust threshold', () => {
-    config.trustScoreMinThreshold = 0.95;
-    const decision = assessWriteSecurity('User prefers TypeScript', 'unknown-site.com');
+    const decision = assessWriteSecurity('User prefers TypeScript', 'unknown-site.com', assessConfig({ trustScoreMinThreshold: 0.95 }));
     expect(decision.allowed).toBe(false);
     expect(decision.blockedBy).toBe('trust');
   });
 
   it('allows clean content from a trusted source', () => {
-    const decision = assessWriteSecurity('User prefers TypeScript', 'claude.ai');
+    const decision = assessWriteSecurity('User prefers TypeScript', 'claude.ai', assessConfig());
     expect(decision.allowed).toBe(true);
     expect(decision.blockedBy).toBeNull();
   });

@@ -11,7 +11,6 @@
  * N× extraction API calls.
  */
 
-import { config } from '../config.js';
 import { extractFacts, type ExtractedFact } from './extraction.js';
 import { cachedExtractFacts } from './extraction-cache.js';
 import { chunkedExtractFacts } from './chunked-extraction.js';
@@ -20,6 +19,17 @@ import { classifyNetwork } from './memory-network.js';
 
 const SIMILARITY_THRESHOLD = 0.90;
 
+/**
+ * Config subset consumed by consensusExtractFacts. Kept narrow so callers
+ * only need to thread through the fields the function actually reads —
+ * a `Pick<IngestRuntimeConfig, ...>` of the deps.config bundle.
+ */
+export interface ConsensusExtractionConfig {
+  consensusExtractionEnabled: boolean;
+  consensusExtractionRuns: number;
+  chunkedExtractionEnabled: boolean;
+}
+
 interface FactWithEmbedding {
   fact: ExtractedFact;
   embedding: number[];
@@ -30,9 +40,13 @@ interface FactWithEmbedding {
  * - "consensus" (default): Keep only facts that appear in majority of runs.
  * - "union": Keep all unique facts found across all runs (improves recall).
  * Falls back to single extraction when consensus is disabled.
+ *
+ * Config is passed explicitly — consumers thread their `deps.config`
+ * through. This module no longer reads the module-level config singleton.
  */
 export async function consensusExtractFacts(
   conversationText: string,
+  config: ConsensusExtractionConfig,
 ): Promise<ExtractedFact[]> {
   if (!config.consensusExtractionEnabled) {
     return config.chunkedExtractionEnabled

@@ -5,7 +5,6 @@
 
 import { mkdirSync, appendFileSync } from 'node:fs';
 import { dirname, join, resolve } from 'node:path';
-import { config } from '../config.js';
 
 export type CostStage = 'extract' | 'answer' | 'judge' | 'embedding' | 'other';
 
@@ -64,7 +63,20 @@ export function estimateCostUsd(provider: string, model: string, usage?: CostUsa
   return (input / 1_000_000) * price.input + (output / 1_000_000) * price.output;
 }
 
-export function writeCostEvent(event: Omit<CostEvent, 'ts' | 'runId'>): void {
+/**
+ * Config subset consumed by writeCostEvent. Narrow Pick<> of the supported
+ * operator-config surface so callers only thread what the function reads.
+ */
+export interface WriteCostEventConfig {
+  costLoggingEnabled: boolean;
+  costRunId: string;
+  costLogDir: string;
+}
+
+export function writeCostEvent(
+  event: Omit<CostEvent, 'ts' | 'runId'>,
+  config: WriteCostEventConfig,
+): void {
   if (!config.costLoggingEnabled) return;
   const runId = config.costRunId || `adhoc-${new Date().toISOString().slice(0, 10)}`;
   const logPath = resolve(config.costLogDir, `${runId}.jsonl`);