Skip to content

Commit ee8f463

Browse files
committed
feat(memory/typed-network): Phase 2 LLM observer (6-step extraction prompt + zod schema)
- TYPED_EXTRACTION_SYSTEM_PROMPT + buildExtractionUserPrompt for the Hindsight 6-step pipeline (coreference, temporal normalization, participant attribution, reasoning preservation, fact-type classification, entity extraction). - TypedExtractionSchema (zod) enforces the structured-output contract; TypedNetworkObserver wraps the LLM call + parsing + ID assignment. - ITypedExtractionLLM interface lets tests inject mocks. - 11 unit tests covering happy path, malformed JSON, schema violations, markdown code-fence stripping.
1 parent ebdf565 commit ee8f463

4 files changed

Lines changed: 373 additions & 0 deletions

File tree

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/**
2+
* @file TypedNetworkObserver.ts
3+
* @description LLM-driven extractor that turns a conversation block
4+
* into 0+ {@link TypedFact}s. Wraps the 6-step extraction prompt and
5+
* the zod-validated parsing of the LLM's structured-output response.
6+
*
7+
* Production wiring: a typical caller constructs the observer once per
8+
* pipeline (re-using the same `gpt-5-mini` adapter), then invokes
9+
* {@link TypedNetworkObserver.extract} per session. The returned facts
10+
* are then upserted into a {@link TypedNetworkStore} and embedded by
11+
* the host's {@link IEmbeddingManager}.
12+
*
13+
* @module @framers/agentos/memory/retrieval/typed-network/TypedNetworkObserver
14+
*/
15+
16+
import { TypedExtractionSchema } from './prompts/extraction-schema.js';
17+
import {
18+
TYPED_EXTRACTION_SYSTEM_PROMPT,
19+
buildExtractionUserPrompt,
20+
} from './prompts/extraction-prompt.js';
21+
import type { TypedFact } from './types.js';
22+
23+
/**
24+
* Provider-agnostic LLM interface for the extractor. Matches the
25+
* shape used elsewhere in agentos for classifier / observer LLM
26+
* adapters: a single `invoke(args)` async method returning the raw
27+
* text response. Implementations wrap OpenAI, Anthropic, local
28+
* models, or test mocks.
29+
*/
30+
export interface ITypedExtractionLLM {
31+
invoke(args: {
32+
system: string;
33+
user: string;
34+
maxTokens: number;
35+
temperature: number;
36+
}): Promise<string>;
37+
}
38+
39+
/**
40+
* Construction options for the observer.
41+
*/
42+
export interface TypedNetworkObserverOptions {
43+
/** LLM adapter implementing the 6-step extraction call. */
44+
llm: ITypedExtractionLLM;
45+
/** Max output tokens. Default 4096 (Hindsight extractions are typically 50-200 facts × ~30 tokens each). */
46+
maxTokens?: number;
47+
/** Temperature. Default 0 for deterministic extraction. */
48+
temperature?: number;
49+
}
50+
51+
/**
52+
* The 6-step extractor. Stateless aside from its constructor options;
53+
* safe to share across concurrent extractions.
54+
*/
55+
export class TypedNetworkObserver {
56+
private readonly llm: ITypedExtractionLLM;
57+
private readonly maxTokens: number;
58+
private readonly temperature: number;
59+
60+
constructor(options: TypedNetworkObserverOptions) {
61+
this.llm = options.llm;
62+
this.maxTokens = options.maxTokens ?? 4096;
63+
this.temperature = options.temperature ?? 0;
64+
}
65+
66+
/**
67+
* Extract typed facts from a conversation block. Uses the 6-step
68+
* prompt + zod-validated parsing. The resulting facts have stable
69+
* IDs of the form `<sessionId>-fact-<index>` so re-extraction
70+
* against the same content reproduces the same IDs.
71+
*
72+
* @param sessionText - Full conversation text. Will be wrapped in
73+
* the user prompt's delimiters automatically.
74+
* @param sessionId - Stable identifier used to namespace the
75+
* resulting fact IDs.
76+
* @returns Array of {@link TypedFact}s, possibly empty.
77+
* @throws ZodError if the LLM output fails schema validation.
78+
* @throws SyntaxError if the LLM output is not valid JSON.
79+
*/
80+
async extract(sessionText: string, sessionId: string): Promise<TypedFact[]> {
81+
const raw = await this.llm.invoke({
82+
system: TYPED_EXTRACTION_SYSTEM_PROMPT,
83+
user: buildExtractionUserPrompt(sessionText),
84+
maxTokens: this.maxTokens,
85+
temperature: this.temperature,
86+
});
87+
// Strip markdown code fences if the LLM wraps the JSON in them
88+
// (some models do this even with explicit "no commentary" prompts).
89+
const stripped = stripCodeFence(raw);
90+
const json = JSON.parse(stripped);
91+
const parsed = TypedExtractionSchema.parse(json);
92+
return parsed.facts.map((f, idx) => ({
93+
id: `${sessionId}-fact-${idx}`,
94+
bank: f.bank,
95+
text: f.text,
96+
embedding: [],
97+
temporal: f.temporal,
98+
participants: f.participants,
99+
reasoningMarkers: f.reasoning_markers,
100+
entities: f.entities,
101+
confidence: f.confidence,
102+
}));
103+
}
104+
}
105+
106+
/**
107+
* Strip leading/trailing markdown code fences. Tolerates both
108+
* triple-backtick-with-language and bare triple-backtick wrappers.
109+
*/
110+
function stripCodeFence(s: string): string {
111+
const trimmed = s.trim();
112+
if (!trimmed.startsWith('```')) return trimmed;
113+
// Drop the opening ``` (with or without language tag) and any trailing ```
114+
const withoutOpen = trimmed.replace(/^```(?:json|JSON)?\s*\n?/, '');
115+
return withoutOpen.replace(/\n?```\s*$/, '');
116+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/**
2+
* @file TypedNetworkObserver.test.ts
3+
* @description Contract tests for the 6-step LLM extractor. Uses a
4+
* mocked LLM to assert: structured-output parsing, ID generation
5+
* format, all-bank routing through the observer, validation rejection
6+
* of malformed output, and code-fence tolerance.
7+
*/
8+
9+
import { describe, it, expect } from 'vitest';
10+
import { TypedNetworkObserver, type ITypedExtractionLLM } from '../TypedNetworkObserver.js';
11+
12+
function mockLLM(response: string): ITypedExtractionLLM {
13+
return { invoke: async () => response };
14+
}
15+
16+
describe('TypedNetworkObserver', () => {
17+
it('parses valid LLM output into TypedFact[]', async () => {
18+
const llm = mockLLM(JSON.stringify({
19+
facts: [{
20+
text: 'Berlin is in Germany',
21+
bank: 'WORLD',
22+
temporal: { mention: '2026-04-26T10:00:00Z' },
23+
participants: [],
24+
reasoning_markers: [],
25+
entities: ['Berlin', 'Germany'],
26+
confidence: 1.0,
27+
}],
28+
}));
29+
const obs = new TypedNetworkObserver({ llm });
30+
const facts = await obs.extract('User: Where is Berlin? Assistant: In Germany.', 'session-1');
31+
expect(facts).toHaveLength(1);
32+
expect(facts[0].bank).toBe('WORLD');
33+
expect(facts[0].entities).toContain('Berlin');
34+
expect(facts[0].id).toBe('session-1-fact-0');
35+
});
36+
37+
it('generates sequential IDs for multiple facts', async () => {
38+
const llm = mockLLM(JSON.stringify({
39+
facts: [
40+
{ text: 'A', bank: 'WORLD', temporal: { mention: '2026-04-26T10:00:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
41+
{ text: 'B', bank: 'EXPERIENCE', temporal: { mention: '2026-04-26T10:01:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
42+
{ text: 'C', bank: 'OPINION', temporal: { mention: '2026-04-26T10:02:00Z' }, participants: [], reasoning_markers: [], entities: [], confidence: 0.7 },
43+
],
44+
}));
45+
const obs = new TypedNetworkObserver({ llm });
46+
const facts = await obs.extract('text', 'sx');
47+
expect(facts.map((f) => f.id)).toEqual(['sx-fact-0', 'sx-fact-1', 'sx-fact-2']);
48+
});
49+
50+
it('routes facts into all four banks', async () => {
51+
const llm = mockLLM(JSON.stringify({
52+
facts: [
53+
{ text: 'World', bank: 'WORLD', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
54+
{ text: 'Exp', bank: 'EXPERIENCE', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
55+
{ text: 'Op', bank: 'OPINION', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 0.5 },
56+
{ text: 'Obs', bank: 'OBSERVATION', temporal: { mention: 'now' }, participants: [], reasoning_markers: [], entities: [], confidence: 1.0 },
57+
],
58+
}));
59+
const obs = new TypedNetworkObserver({ llm });
60+
const facts = await obs.extract('text', 's1');
61+
const banks = facts.map((f) => f.bank);
62+
expect(banks).toEqual(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION']);
63+
});
64+
65+
it('snake_case → camelCase translation for reasoning_markers', async () => {
66+
const llm = mockLLM(JSON.stringify({
67+
facts: [{
68+
text: 'Because the user prefers TypeScript, we use Bun',
69+
bank: 'EXPERIENCE',
70+
temporal: { mention: '2026-04-26T10:00:00Z' },
71+
participants: [],
72+
reasoning_markers: ['Because', 'we use'],
73+
entities: ['TypeScript', 'Bun'],
74+
confidence: 1.0,
75+
}],
76+
}));
77+
const obs = new TypedNetworkObserver({ llm });
78+
const facts = await obs.extract('text', 's1');
79+
expect(facts[0].reasoningMarkers).toEqual(['Because', 'we use']);
80+
});
81+
82+
it('throws on missing required field (zod validation)', async () => {
83+
const llm = mockLLM('{"facts": [{"text": ""}]}');
84+
const obs = new TypedNetworkObserver({ llm });
85+
await expect(obs.extract('blah', 'session-2')).rejects.toThrow();
86+
});
87+
88+
it('throws on unknown bank label', async () => {
89+
const llm = mockLLM(JSON.stringify({
90+
facts: [{
91+
text: 'foo',
92+
bank: 'FOO',
93+
temporal: { mention: 'now' },
94+
participants: [],
95+
reasoning_markers: [],
96+
entities: [],
97+
confidence: 1.0,
98+
}],
99+
}));
100+
const obs = new TypedNetworkObserver({ llm });
101+
await expect(obs.extract('text', 's1')).rejects.toThrow();
102+
});
103+
104+
it('throws on confidence outside [0, 1]', async () => {
105+
const llm = mockLLM(JSON.stringify({
106+
facts: [{
107+
text: 'foo',
108+
bank: 'OPINION',
109+
temporal: { mention: 'now' },
110+
participants: [],
111+
reasoning_markers: [],
112+
entities: [],
113+
confidence: 1.5,
114+
}],
115+
}));
116+
const obs = new TypedNetworkObserver({ llm });
117+
await expect(obs.extract('text', 's1')).rejects.toThrow();
118+
});
119+
120+
it('tolerates triple-backtick code fence around JSON', async () => {
121+
const llm = mockLLM('```json\n{"facts": []}\n```');
122+
const obs = new TypedNetworkObserver({ llm });
123+
const facts = await obs.extract('text', 's1');
124+
expect(facts).toEqual([]);
125+
});
126+
127+
it('tolerates bare backticks without language tag', async () => {
128+
const llm = mockLLM('```\n{"facts": []}\n```');
129+
const obs = new TypedNetworkObserver({ llm });
130+
const facts = await obs.extract('text', 's1');
131+
expect(facts).toEqual([]);
132+
});
133+
134+
it('passes maxTokens and temperature to the LLM', async () => {
135+
let capturedArgs: { maxTokens: number; temperature: number } | undefined;
136+
const llm: ITypedExtractionLLM = {
137+
invoke: async (args) => {
138+
capturedArgs = { maxTokens: args.maxTokens, temperature: args.temperature };
139+
return JSON.stringify({ facts: [] });
140+
},
141+
};
142+
const obs = new TypedNetworkObserver({ llm, maxTokens: 8192, temperature: 0.2 });
143+
await obs.extract('text', 's1');
144+
expect(capturedArgs?.maxTokens).toBe(8192);
145+
expect(capturedArgs?.temperature).toBe(0.2);
146+
});
147+
148+
it('default maxTokens=4096, temperature=0', async () => {
149+
let capturedArgs: { maxTokens: number; temperature: number } | undefined;
150+
const llm: ITypedExtractionLLM = {
151+
invoke: async (args) => {
152+
capturedArgs = { maxTokens: args.maxTokens, temperature: args.temperature };
153+
return JSON.stringify({ facts: [] });
154+
},
155+
};
156+
const obs = new TypedNetworkObserver({ llm });
157+
await obs.extract('text', 's1');
158+
expect(capturedArgs?.maxTokens).toBe(4096);
159+
expect(capturedArgs?.temperature).toBe(0);
160+
});
161+
});
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @file extraction-prompt.ts
3+
* @description The 6-step extraction prompt for the Hindsight 4-network
4+
* typed observer. The system prompt defines the six decomposition
5+
* steps verbatim from Hindsight §2.3 (coreference resolution, temporal
6+
* normalization, participant attribution, reasoning preservation, fact
7+
* type classification, entity extraction). The user prompt frames the
8+
* conversation as a single block and asks the model to emit structured
9+
* JSON conforming to {@link TypedExtractionSchema}.
10+
*
11+
* @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-prompt
12+
*/
13+
14+
/**
15+
* System prompt for the 6-step extraction. Verbatim from Hindsight
16+
* §2.3 with one omission: the spec doesn't include the "do not
17+
* commentate" line, but the LLM tends to drift into prose without it,
18+
* which breaks JSON parsing. Included.
19+
*/
20+
export const TYPED_EXTRACTION_SYSTEM_PROMPT = `You are an information extractor for a typed memory network. Process the conversation below into structured facts.
21+
22+
For each fact, perform these six steps:
23+
24+
1. COREFERENCE: resolve "he/she/they/it/this/that" to the actual referent.
25+
2. TEMPORAL: normalize times to ISO 8601. Extract ranges as (start, end) when applicable.
26+
3. PARTICIPANTS: list every named participant and their role.
27+
4. REASONING: preserve any explicit reasoning marker (because, since, therefore, etc.) verbatim.
28+
5. FACT TYPE: classify into ONE of:
29+
- WORLD: objective facts about the external world
30+
- EXPERIENCE: biographical / first-person events
31+
- OPINION: claims with confidence < 1.0
32+
- OBSERVATION: preference-neutral summaries of entities
33+
6. ENTITIES: list every named entity (proper nouns, organizations, places, products).
34+
35+
Output JSON matching the schema strictly. Do not add commentary.`;
36+
37+
/**
38+
* Build the user prompt for a single conversation block. Wraps the
39+
* source text in delimiters that resist accidental inline-injection
40+
* if the conversation contains JSON-looking content.
41+
*
42+
* @param sessionText - The conversation text to extract from. Whole
43+
* session passed as one block; the model decomposes per turn
44+
* internally.
45+
*/
46+
export function buildExtractionUserPrompt(sessionText: string): string {
47+
return `CONVERSATION:\n<<<\n${sessionText}\n>>>`;
48+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @file extraction-schema.ts
3+
* @description Zod schema for parsing the LLM's structured-output
4+
* response in the typed-network extraction pipeline. Mirrors
5+
* {@link TypedFact} fields but uses snake_case for the LLM API
6+
* boundary (LLMs tend to emit snake_case more reliably than
7+
* camelCase). The {@link TypedNetworkObserver} translates from this
8+
* schema's snake_case shape to the camelCase TypedFact at construction
9+
* time.
10+
*
11+
* @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-schema
12+
*/
13+
14+
import { z } from 'zod';
15+
16+
/**
17+
* Schema for one extracted fact, matching the LLM's expected output.
18+
* `confidence` defaults to 1.0 when missing — the schema permits
19+
* omission for non-Opinion facts where the value is structurally 1.0.
20+
*/
21+
export const TypedExtractionFactSchema = z.object({
22+
text: z.string().min(1),
23+
bank: z.enum(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION']),
24+
temporal: z.object({
25+
start: z.string().optional(),
26+
end: z.string().optional(),
27+
mention: z.string(),
28+
}),
29+
participants: z.array(
30+
z.object({ name: z.string(), role: z.string() }),
31+
),
32+
reasoning_markers: z.array(z.string()),
33+
entities: z.array(z.string()),
34+
confidence: z.number().min(0).max(1).default(1.0),
35+
});
36+
37+
/**
38+
* Top-level schema. Wraps the fact array under a `facts` key so the
39+
* LLM has a stable structural anchor to emit against.
40+
*/
41+
export const TypedExtractionSchema = z.object({
42+
facts: z.array(TypedExtractionFactSchema),
43+
});
44+
45+
/** TypeScript type inferred from {@link TypedExtractionSchema}. */
46+
export type TypedExtractionOutput = z.infer<typeof TypedExtractionSchema>;
47+
/** Per-fact type inferred from {@link TypedExtractionFactSchema}. */
48+
export type TypedExtractionFact = z.infer<typeof TypedExtractionFactSchema>;

0 commit comments

Comments
 (0)