feat(memory/typed-network): Phase 2 LLM observer (6-step extraction prompt + zod schema)

jddunn · jddunn · commit ee8f46326db8 · 2026-04-25T21:24:52.000-07:00
- TYPED_EXTRACTION_SYSTEM_PROMPT + buildExtractionUserPrompt for the
  Hindsight 6-step pipeline (coreference, temporal normalization,
  participant attribution, reasoning preservation, fact-type
  classification, entity extraction).
- TypedExtractionSchema (zod) enforces the structured-output contract;
  TypedNetworkObserver wraps the LLM call + parsing + ID assignment.
- ITypedExtractionLLM interface lets tests inject mocks.
- 11 unit tests covering happy path, malformed JSON, schema violations,
  markdown code-fence stripping.
diff --git a/src/memory/retrieval/typed-network/TypedNetworkObserver.ts b/src/memory/retrieval/typed-network/TypedNetworkObserver.ts
@@ -0,0 +1,116 @@
+/**
+ * @file TypedNetworkObserver.ts
+ * @description LLM-driven extractor that turns a conversation block
+ * into 0+ {@link TypedFact}s. Wraps the 6-step extraction prompt and
+ * the zod-validated parsing of the LLM's structured-output response.
+ *
+ * Production wiring: a typical caller constructs the observer once per
+ * pipeline (re-using the same `gpt-5-mini` adapter), then invokes
+ * {@link TypedNetworkObserver.extract} per session. The returned facts
+ * are then upserted into a {@link TypedNetworkStore} and embedded by
+ * the host's {@link IEmbeddingManager}.
+ *
+ * @module @framers/agentos/memory/retrieval/typed-network/TypedNetworkObserver
+ */
+
+import { TypedExtractionSchema } from './prompts/extraction-schema.js';
+import {
+  TYPED_EXTRACTION_SYSTEM_PROMPT,
+  buildExtractionUserPrompt,
+} from './prompts/extraction-prompt.js';
+import type { TypedFact } from './types.js';
+
+/**
+ * Provider-agnostic LLM interface for the extractor. Matches the
+ * shape used elsewhere in agentos for classifier / observer LLM
+ * adapters: a single `invoke(args)` async method returning the raw
+ * text response. Implementations wrap OpenAI, Anthropic, local
+ * models, or test mocks.
+ */
+export interface ITypedExtractionLLM {
+  invoke(args: {
+    system: string;
+    user: string;
+    maxTokens: number;
+    temperature: number;
+  }): Promise<string>;
+}
+
+/**
+ * Construction options for the observer.
+ */
+export interface TypedNetworkObserverOptions {
+  /** LLM adapter implementing the 6-step extraction call. */
+  llm: ITypedExtractionLLM;
+  /** Max output tokens. Default 4096 (Hindsight extractions are typically 50-200 facts × ~30 tokens each). */
+  maxTokens?: number;
+  /** Temperature. Default 0 for deterministic extraction. */
+  temperature?: number;
+}
+
+/**
+ * The 6-step extractor. Stateless aside from its constructor options;
+ * safe to share across concurrent extractions.
+ */
+export class TypedNetworkObserver {
+  private readonly llm: ITypedExtractionLLM;
+  private readonly maxTokens: number;
+  private readonly temperature: number;
+
+  constructor(options: TypedNetworkObserverOptions) {
+    this.llm = options.llm;
+    this.maxTokens = options.maxTokens ?? 4096;
+    this.temperature = options.temperature ?? 0;
+  }
+
+  /**
+   * Extract typed facts from a conversation block. Uses the 6-step
+   * prompt + zod-validated parsing. The resulting facts have stable
+   * IDs of the form `<sessionId>-fact-<index>` so re-extraction
+   * against the same content reproduces the same IDs.
+   *
+   * @param sessionText - Full conversation text. Will be wrapped in
+   *   the user prompt's delimiters automatically.
+   * @param sessionId - Stable identifier used to namespace the
+   *   resulting fact IDs.
+   * @returns Array of {@link TypedFact}s, possibly empty.
+   * @throws ZodError if the LLM output fails schema validation.
+   * @throws SyntaxError if the LLM output is not valid JSON.
+   */
+  async extract(sessionText: string, sessionId: string): Promise<TypedFact[]> {
+    const raw = await this.llm.invoke({
+      system: TYPED_EXTRACTION_SYSTEM_PROMPT,
+      user: buildExtractionUserPrompt(sessionText),
+      maxTokens: this.maxTokens,
+      temperature: this.temperature,
+    });
+    // Strip markdown code fences if the LLM wraps the JSON in them
+    // (some models do this even with explicit "no commentary" prompts).
+    const stripped = stripCodeFence(raw);
+    const json = JSON.parse(stripped);
+    const parsed = TypedExtractionSchema.parse(json);
+    return parsed.facts.map((f, idx) => ({
+      id: `${sessionId}-fact-${idx}`,
+      bank: f.bank,
+      text: f.text,
+      embedding: [],
+      temporal: f.temporal,
+      participants: f.participants,
+      reasoningMarkers: f.reasoning_markers,
+      entities: f.entities,
+      confidence: f.confidence,
+    }));
+  }
+}
+
+/**
+ * Strip leading/trailing markdown code fences. Tolerates both
+ * triple-backtick-with-language and bare triple-backtick wrappers.
+ */
+function stripCodeFence(s: string): string {
+  const trimmed = s.trim();
+  if (!trimmed.startsWith('```')) return trimmed;
+  // Drop the opening ``` (with or without language tag) and any trailing ```
+  const withoutOpen = trimmed.replace(/^```(?:json|JSON)?\s*\n?/, '');
+  return withoutOpen.replace(/\n?```\s*$/, '');
+}
diff --git a/src/memory/retrieval/typed-network/__tests__/TypedNetworkObserver.test.ts b/src/memory/retrieval/typed-network/__tests__/TypedNetworkObserver.test.ts
@@ -0,0 +1,161 @@
+/**
+ * @file TypedNetworkObserver.test.ts
+ * @description Contract tests for the 6-step LLM extractor. Uses a
+ * mocked LLM to assert: structured-output parsing, ID generation
+ * format, all-bank routing through the observer, validation rejection
+ * of malformed output, and code-fence tolerance.
+ */
+
+import { describe, it, expect } from 'vitest';
+import { TypedNetworkObserver, type ITypedExtractionLLM } from '../TypedNetworkObserver.js';
+
+function mockLLM(response: string): ITypedExtractionLLM {
+  return { invoke: async () => response };
+}
+
+describe('TypedNetworkObserver', () => {
+  it('parses valid LLM output into TypedFact[]', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [{
+        text: 'Berlin is in Germany',
+        bank: 'WORLD',
+        temporal: { mention: '2026-04-26T10:00:00Z' },
+        participants: [],
+        reasoning_markers: [],
+        entities: ['Berlin', 'Germany'],
+        confidence: 1.0,
+      }],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('User: Where is Berlin? Assistant: In Germany.', 'session-1');
+    expect(facts).toHaveLength(1);
+    expect(facts[0].bank).toBe('WORLD');
+    expect(facts[0].entities).toContain('Berlin');
+    expect(facts[0].id).toBe('session-1-fact-0');
+  });
+
+  it('generates sequential IDs for multiple facts', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [
+        { text: 'A', bank: 'WORLD', temporal: { mention: '2026-04-26T10:00:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
+        { text: 'B', bank: 'EXPERIENCE', temporal: { mention: '2026-04-26T10:01:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
+        { text: 'C', bank: 'OPINION', temporal: { mention: '2026-04-26T10:02:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 0.7 },
+      ],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('text', 'sx');
+    expect(facts.map((f) => f.id)).toEqual(['sx-fact-0', 'sx-fact-1', 'sx-fact-2']);
+  });
+
+  it('routes facts into all four banks', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [
+        { text: 'World', bank: 'WORLD', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
+        { text: 'Exp', bank: 'EXPERIENCE', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
+        { text: 'Op', bank: 'OPINION', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 0.5 },
+        { text: 'Obs', bank: 'OBSERVATION', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
+      ],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('text', 's1');
+    const banks = facts.map((f) => f.bank);
+    expect(banks).toEqual(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION']);
+  });
+
+  it('snake_case → camelCase translation for reasoning_markers', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [{
+        text: 'Because the user prefers TypeScript, we use Bun',
+        bank: 'EXPERIENCE',
+        temporal: { mention: '2026-04-26T10:00:00Z' },
+        participants: [],
+        reasoning_markers: ['Because', 'we use'],
+        entities: ['TypeScript', 'Bun'],
+        confidence: 1.0,
+      }],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('text', 's1');
+    expect(facts[0].reasoningMarkers).toEqual(['Because', 'we use']);
+  });
+
+  it('throws on missing required field (zod validation)', async () => {
+    const llm = mockLLM('{"facts": [{"text": ""}]}');
+    const obs = new TypedNetworkObserver({ llm });
+    await expect(obs.extract('blah', 'session-2')).rejects.toThrow();
+  });
+
+  it('throws on unknown bank label', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [{
+        text: 'foo',
+        bank: 'FOO',
+        temporal: { mention: 'now' },
+        participants: [],
+        reasoning_markers: [],
+        entities: [],
+        confidence: 1.0,
+      }],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    await expect(obs.extract('text', 's1')).rejects.toThrow();
+  });
+
+  it('throws on confidence outside [0, 1]', async () => {
+    const llm = mockLLM(JSON.stringify({
+      facts: [{
+        text: 'foo',
+        bank: 'OPINION',
+        temporal: { mention: 'now' },
+        participants: [],
+        reasoning_markers: [],
+        entities: [],
+        confidence: 1.5,
+      }],
+    }));
+    const obs = new TypedNetworkObserver({ llm });
+    await expect(obs.extract('text', 's1')).rejects.toThrow();
+  });
+
+  it('tolerates triple-backtick code fence around JSON', async () => {
+    const llm = mockLLM('```json\n{"facts": []}\n```');
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('text', 's1');
+    expect(facts).toEqual([]);
+  });
+
+  it('tolerates bare backticks without language tag', async () => {
+    const llm = mockLLM('```\n{"facts": []}\n```');
+    const obs = new TypedNetworkObserver({ llm });
+    const facts = await obs.extract('text', 's1');
+    expect(facts).toEqual([]);
+  });
+
+  it('passes maxTokens and temperature to the LLM', async () => {
+    let capturedArgs: { maxTokens: number; temperature: number } | undefined;
+    const llm: ITypedExtractionLLM = {
+      invoke: async (args) => {
+        capturedArgs = { maxTokens: args.maxTokens, temperature: args.temperature };
+        return JSON.stringify({ facts: [] });
+      },
+    };
+    const obs = new TypedNetworkObserver({ llm, maxTokens: 8192, temperature: 0.2 });
+    await obs.extract('text', 's1');
+    expect(capturedArgs?.maxTokens).toBe(8192);
+    expect(capturedArgs?.temperature).toBe(0.2);
+  });
+
+  it('default maxTokens=4096, temperature=0', async () => {
+    let capturedArgs: { maxTokens: number; temperature: number } | undefined;
+    const llm: ITypedExtractionLLM = {
+      invoke: async (args) => {
+        capturedArgs = { maxTokens: args.maxTokens, temperature: args.temperature };
+        return JSON.stringify({ facts: [] });
+      },
+    };
+    const obs = new TypedNetworkObserver({ llm });
+    await obs.extract('text', 's1');
+    expect(capturedArgs?.maxTokens).toBe(4096);
+    expect(capturedArgs?.temperature).toBe(0);
+  });
+});
diff --git a/src/memory/retrieval/typed-network/prompts/extraction-prompt.ts b/src/memory/retrieval/typed-network/prompts/extraction-prompt.ts
@@ -0,0 +1,48 @@
+/**
+ * @file extraction-prompt.ts
+ * @description The 6-step extraction prompt for the Hindsight 4-network
+ * typed observer. The system prompt defines the six decomposition
+ * steps verbatim from Hindsight §2.3 (coreference resolution, temporal
+ * normalization, participant attribution, reasoning preservation, fact
+ * type classification, entity extraction). The user prompt frames the
+ * conversation as a single block and asks the model to emit structured
+ * JSON conforming to {@link TypedExtractionSchema}.
+ *
+ * @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-prompt
+ */
+
+/**
+ * System prompt for the 6-step extraction. Verbatim from Hindsight
+ * §2.3 with one omission: the spec doesn't include the "do not
+ * commentate" line, but the LLM tends to drift into prose without it,
+ * which breaks JSON parsing. Included.
+ */
+export const TYPED_EXTRACTION_SYSTEM_PROMPT = `You are an information extractor for a typed memory network. Process the conversation below into structured facts.
+
+For each fact, perform these six steps:
+
+1. COREFERENCE: resolve "he/she/they/it/this/that" to the actual referent.
+2. TEMPORAL: normalize times to ISO 8601. Extract ranges as (start, end) when applicable.
+3. PARTICIPANTS: list every named participant and their role.
+4. REASONING: preserve any explicit reasoning marker (because, since, therefore, etc.) verbatim.
+5. FACT TYPE: classify into ONE of:
+   - WORLD: objective facts about the external world
+   - EXPERIENCE: biographical / first-person events
+   - OPINION: claims with confidence < 1.0
+   - OBSERVATION: preference-neutral summaries of entities
+6. ENTITIES: list every named entity (proper nouns, organizations, places, products).
+
+Output JSON matching the schema strictly. Do not add commentary.`;
+
+/**
+ * Build the user prompt for a single conversation block. Wraps the
+ * source text in delimiters that resist accidental inline-injection
+ * if the conversation contains JSON-looking content.
+ *
+ * @param sessionText - The conversation text to extract from. Whole
+ *   session passed as one block; the model decomposes per turn
+ *   internally.
+ */
+export function buildExtractionUserPrompt(sessionText: string): string {
+  return `CONVERSATION:\n<<<\n${sessionText}\n>>>`;
+}
diff --git a/src/memory/retrieval/typed-network/prompts/extraction-schema.ts b/src/memory/retrieval/typed-network/prompts/extraction-schema.ts
@@ -0,0 +1,48 @@
+/**
+ * @file extraction-schema.ts
+ * @description Zod schema for parsing the LLM's structured-output
+ * response in the typed-network extraction pipeline. Mirrors
+ * {@link TypedFact} fields but uses snake_case for the LLM API
+ * boundary (LLMs tend to emit snake_case more reliably than
+ * camelCase). The {@link TypedNetworkObserver} translates from this
+ * schema's snake_case shape to the camelCase TypedFact at construction
+ * time.
+ *
+ * @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-schema
+ */
+
+import { z } from 'zod';
+
+/**
+ * Schema for one extracted fact, matching the LLM's expected output.
+ * `confidence` defaults to 1.0 when missing — the schema permits
+ * omission for non-Opinion facts where the value is structurally 1.0.
+ */
+export const TypedExtractionFactSchema = z.object({
+  text: z.string().min(1),
+  bank: z.enum(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION']),
+  temporal: z.object({
+    start: z.string().optional(),
+    end: z.string().optional(),
+    mention: z.string(),
+  }),
+  participants: z.array(
+    z.object({ name: z.string(), role: z.string() }),
+  ),
+  reasoning_markers: z.array(z.string()),
+  entities: z.array(z.string()),
+  confidence: z.number().min(0).max(1).default(1.0),
+});
+
+/**
+ * Top-level schema. Wraps the fact array under a `facts` key so the
+ * LLM has a stable structural anchor to emit against.
+ */
+export const TypedExtractionSchema = z.object({
+  facts: z.array(TypedExtractionFactSchema),
+});
+
+/** TypeScript type inferred from {@link TypedExtractionSchema}. */
+export type TypedExtractionOutput = z.infer<typeof TypedExtractionSchema>;
+/** Per-fact type inferred from {@link TypedExtractionFactSchema}. */
+export type TypedExtractionFact = z.infer<typeof TypedExtractionFactSchema>;