Skip to content

Commit be9585e

Browse files
committed
memory: FactSupersession post-retrieval LLM filter (drops superseded traces)
1 parent b70b527 commit be9585e

5 files changed

Lines changed: 322 additions & 0 deletions

File tree

src/memory/core/types.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,17 @@ export interface CognitiveRetrievalResult {
263263
* benefited from expansion.
264264
*/
265265
hyde?: { hypothesis: string };
266+
/**
267+
* Step-5: post-retrieve FactSupersession pass diagnostics.
268+
* Populated only when a bench adapter or downstream consumer
269+
* ran `FactSupersession.resolve()` over the retrieved traces.
270+
*/
271+
factSupersession?: {
272+
droppedIds: string[];
273+
parseOk: boolean;
274+
llmLatencyMs: number;
275+
notes?: string[];
276+
};
266277
};
267278
}
268279

src/memory/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,11 @@ export type {
387387
RelationshipDriftInput,
388388
HEXACOTrait,
389389
} from './mechanisms/PersonaDriftMechanism.js';
390+
391+
// Step-5: FactSupersession post-retrieval filter.
392+
export { FactSupersession } from './retrieval/fact-supersession/index.js';
393+
export type {
394+
FactSupersessionOptions,
395+
FactSupersessionInput,
396+
FactSupersessionResult,
397+
} from './retrieval/fact-supersession/index.js';
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/**
2+
* @file FactSupersession.ts
3+
* @description Post-retrieval filter that uses an LLM to identify and
4+
* drop memory traces whose factual claims have been superseded by
5+
* later traces about the same subject.
6+
*
7+
* ## What this does
8+
*
9+
* Given a query and a list of retrieved `ScoredMemoryTrace`s, fires
10+
* one LLM call with a strict JSON output contract. The LLM returns
11+
* `{ dropIds: string[] }` — trace IDs to remove. The class filters
12+
* the input list and returns the survivors in original order.
13+
*
14+
* ## Failure modes (never throws)
15+
*
16+
* - Parse error → return original traces + `parse-failed` diagnostic.
17+
* - Schema mismatch → return original + `schema-mismatch` diagnostic.
18+
* - Timeout → return original + `timeout` diagnostic.
19+
* - LLM throws → return original + `llm-error` diagnostic.
20+
* - All IDs dropped (adversarial output) → safety clamp, return
21+
* original + `drop-all-rejected` diagnostic.
22+
*
23+
* ## Why this exists
24+
*
25+
* The baseline + Hybrid retrieval surfaces BOTH statements when a
26+
* user has updated a fact ("I live in NYC" + "I moved to Berlin").
27+
* The reader sometimes picks the older or hedges. A supersession
28+
* pass gives the reader only the canonical current state.
29+
*
30+
* @module agentos/memory/retrieval/fact-supersession/FactSupersession
31+
*/
32+
33+
import type { ScoredMemoryTrace } from '../../core/types.js';
34+
35+
export type LlmInvoker = (systemPrompt: string, userPrompt: string) => Promise<string>;
36+
37+
/** Options for constructing a {@link FactSupersession}. */
38+
export interface FactSupersessionOptions {
39+
/** LLM invoker used for the supersession pass. */
40+
llmInvoker: LlmInvoker;
41+
/** Max traces to send to the LLM. @default 10 */
42+
maxTraces?: number;
43+
/** Max wall-clock ms before timeout fallback. @default 8000 */
44+
timeoutMs?: number;
45+
}
46+
47+
/** Per-call input to {@link FactSupersession.resolve}. */
48+
export interface FactSupersessionInput {
49+
traces: ScoredMemoryTrace[];
50+
query: string;
51+
}
52+
53+
/** Per-call output from {@link FactSupersession.resolve}. */
54+
export interface FactSupersessionResult {
55+
/** Traces surviving the filter, in original order. */
56+
traces: ScoredMemoryTrace[];
57+
/** IDs dropped by the LLM (subset of input trace IDs). */
58+
droppedIds: string[];
59+
diagnostics: {
60+
llmLatencyMs: number;
61+
parseOk: boolean;
62+
notes?: string[];
63+
};
64+
}
65+
66+
/**
67+
* Canonical supersession system prompt. Strict rules: supersession
68+
* requires contradiction between two claims about the same (subject,
69+
* predicate); complementary facts never supersede.
70+
*/
71+
const SUPERSESSION_SYSTEM_PROMPT = `You are a fact-supersession analyzer for memory retrieval. Given a user question and N retrieved memory traces, identify traces containing FACTS that have been SUPERSEDED by later traces about the same subject.
72+
73+
Rules:
74+
1. Supersession requires contradiction — two traces making DIFFERENT claims about the same (subject, predicate).
75+
2. Use the timestamp field to order claims chronologically. The LATER trace wins.
76+
3. Traces about DIFFERENT subjects are never mutually superseding.
77+
4. Complementary facts (different predicates about the same subject) never supersede each other.
78+
5. Return a JSON object: {"dropIds": ["id1", "id2"]}. Drop ONLY the outdated ones. Return {"dropIds": []} if no supersession detected.
79+
80+
Do not drop traces that are not clearly superseded.`;
81+
82+
/**
83+
* Post-retrieval fact supersession filter.
84+
*
85+
* @example
86+
* ```ts
87+
* const fs = new FactSupersession({
88+
* llmInvoker: async (system, user) => (await reader.invoke({ system, user, maxTokens: 200, temperature: 0 })).text,
89+
* });
90+
* const result = await fs.resolve({ traces: retrieval.retrieved, query: caseQuery });
91+
* // Feed `result.traces` to the reader instead of `retrieval.retrieved`.
92+
* ```
93+
*/
94+
export class FactSupersession {
95+
private readonly llmInvoker: LlmInvoker;
96+
private readonly maxTraces: number;
97+
private readonly timeoutMs: number;
98+
99+
constructor(opts: FactSupersessionOptions) {
100+
this.llmInvoker = opts.llmInvoker;
101+
this.maxTraces = opts.maxTraces ?? 10;
102+
this.timeoutMs = opts.timeoutMs ?? 8000;
103+
}
104+
105+
async resolve(input: FactSupersessionInput): Promise<FactSupersessionResult> {
106+
const start = Date.now();
107+
const notes: string[] = [];
108+
109+
if (input.traces.length === 0) {
110+
return {
111+
traces: [],
112+
droppedIds: [],
113+
diagnostics: { llmLatencyMs: 0, parseOk: true },
114+
};
115+
}
116+
117+
const window = input.traces.slice(0, this.maxTraces);
118+
const userPrompt = this.buildUserPrompt(input.query, window);
119+
120+
let llmText: string;
121+
try {
122+
llmText = await this.invokeWithTimeout(userPrompt);
123+
} catch (err) {
124+
const reason = (err as Error)?.message?.includes('timeout')
125+
? 'fact-supersession:timeout'
126+
: 'fact-supersession:llm-error';
127+
notes.push(reason);
128+
return {
129+
traces: input.traces,
130+
droppedIds: [],
131+
diagnostics: { llmLatencyMs: Date.now() - start, parseOk: false, notes },
132+
};
133+
}
134+
135+
const parsed = this.parseDropIds(llmText);
136+
if (!parsed.ok) {
137+
notes.push(parsed.reason);
138+
return {
139+
traces: input.traces,
140+
droppedIds: [],
141+
diagnostics: { llmLatencyMs: Date.now() - start, parseOk: false, notes },
142+
};
143+
}
144+
145+
if (parsed.dropIds.length >= input.traces.length) {
146+
notes.push('fact-supersession:drop-all-rejected');
147+
return {
148+
traces: input.traces,
149+
droppedIds: [],
150+
diagnostics: { llmLatencyMs: Date.now() - start, parseOk: true, notes },
151+
};
152+
}
153+
154+
const dropSet = new Set(parsed.dropIds);
155+
const filtered = input.traces.filter((t) => !dropSet.has(t.id));
156+
const realDropped = input.traces
157+
.filter((t) => dropSet.has(t.id))
158+
.map((t) => t.id);
159+
160+
return {
161+
traces: filtered,
162+
droppedIds: realDropped,
163+
diagnostics: {
164+
llmLatencyMs: Date.now() - start,
165+
parseOk: true,
166+
notes: notes.length > 0 ? notes : undefined,
167+
},
168+
};
169+
}
170+
171+
private buildUserPrompt(query: string, traces: ScoredMemoryTrace[]): string {
172+
const lines = traces.map((t) => {
173+
const ts = new Date(t.createdAt).toISOString();
174+
const content = t.content.length > 500 ? `${t.content.slice(0, 500)}...` : t.content;
175+
return `[id=${t.id} | ts=${ts} | "${content}"]`;
176+
});
177+
return `Question: ${query}\n\nTraces (id | timestamp | content):\n${lines.join('\n')}\n\nReturn JSON only.`;
178+
}
179+
180+
private async invokeWithTimeout(userPrompt: string): Promise<string> {
181+
return new Promise<string>((resolve, reject) => {
182+
const timer = setTimeout(() => reject(new Error('fact-supersession timeout')), this.timeoutMs);
183+
this.llmInvoker(SUPERSESSION_SYSTEM_PROMPT, userPrompt)
184+
.then((text) => { clearTimeout(timer); resolve(text); })
185+
.catch((err) => { clearTimeout(timer); reject(err); });
186+
});
187+
}
188+
189+
private parseDropIds(text: string):
190+
| { ok: true; dropIds: string[] }
191+
| { ok: false; reason: string } {
192+
const cleaned = text.trim().replace(/^```(?:json)?/i, '').replace(/```$/i, '').trim();
193+
let obj: unknown;
194+
try {
195+
obj = JSON.parse(cleaned);
196+
} catch {
197+
return { ok: false, reason: 'fact-supersession:parse-failed' };
198+
}
199+
if (!obj || typeof obj !== 'object' || !Array.isArray((obj as { dropIds?: unknown }).dropIds)) {
200+
return { ok: false, reason: 'fact-supersession:schema-mismatch' };
201+
}
202+
const arr = (obj as { dropIds: unknown[] }).dropIds;
203+
if (!arr.every((v) => typeof v === 'string')) {
204+
return { ok: false, reason: 'fact-supersession:schema-mismatch' };
205+
}
206+
return { ok: true, dropIds: arr as string[] };
207+
}
208+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { FactSupersession } from '../FactSupersession.js';
3+
import type { ScoredMemoryTrace } from '../../../core/types.js';
4+
5+
function mkTrace(id: string, content: string, createdAt: number): ScoredMemoryTrace {
6+
return {
7+
id,
8+
type: 'episodic',
9+
scope: 'user',
10+
scopeId: 'u1',
11+
content,
12+
entities: [],
13+
tags: [],
14+
provenance: { sourceType: 'user_statement', sourceTimestamp: createdAt, confidence: 1, verificationCount: 0 },
15+
emotionalContext: { valence: 0, arousal: 0, dominance: 0, intensity: 0, gmiMood: '' },
16+
encodingStrength: 0.5, stability: 0.5, retrievalCount: 0, lastAccessedAt: 0,
17+
accessCount: 0, reinforcementInterval: 0, associatedTraceIds: [],
18+
createdAt, updatedAt: createdAt, isActive: true,
19+
retrievalScore: 0.8,
20+
scoreBreakdown: {
21+
strengthScore: 0, similarityScore: 0.8, recencyScore: 0,
22+
emotionalCongruenceScore: 0, graphActivationScore: 0, importanceScore: 0,
23+
},
24+
};
25+
}
26+
27+
describe('FactSupersession', () => {
28+
it('drops superseded trace when LLM returns its id', async () => {
29+
const traces = [
30+
mkTrace('t1', 'I live in NYC', 1_000_000),
31+
mkTrace('t2', 'I moved to Berlin', 2_000_000),
32+
];
33+
const fs = new FactSupersession({
34+
llmInvoker: async () => JSON.stringify({ dropIds: ['t1'] }),
35+
});
36+
const result = await fs.resolve({ traces, query: 'Where do I live?' });
37+
expect(result.traces.map((t) => t.id)).toEqual(['t2']);
38+
expect(result.droppedIds).toEqual(['t1']);
39+
expect(result.diagnostics.parseOk).toBe(true);
40+
});
41+
42+
it('preserves order and scores when LLM returns empty dropIds', async () => {
43+
const traces = [
44+
mkTrace('t1', 'I like cats', 1_000_000),
45+
mkTrace('t2', 'I also like dogs', 2_000_000),
46+
];
47+
const fs = new FactSupersession({
48+
llmInvoker: async () => JSON.stringify({ dropIds: [] }),
49+
});
50+
const result = await fs.resolve({ traces, query: 'What pets do I like?' });
51+
expect(result.traces.map((t) => t.id)).toEqual(['t1', 't2']);
52+
expect(result.traces[0].retrievalScore).toBe(0.8);
53+
expect(result.droppedIds).toEqual([]);
54+
expect(result.diagnostics.parseOk).toBe(true);
55+
});
56+
57+
it('falls back to original on parse failure', async () => {
58+
const traces = [mkTrace('t1', 'hello', 1_000_000)];
59+
const fs = new FactSupersession({
60+
llmInvoker: async () => 'not valid json {{{',
61+
});
62+
const result = await fs.resolve({ traces, query: 'q' });
63+
expect(result.traces.map((t) => t.id)).toEqual(['t1']);
64+
expect(result.droppedIds).toEqual([]);
65+
expect(result.diagnostics.parseOk).toBe(false);
66+
expect(result.diagnostics.notes).toContain('fact-supersession:parse-failed');
67+
});
68+
69+
it('safety clamp rejects drop-all output', async () => {
70+
const traces = [
71+
mkTrace('t1', 'a', 1_000_000),
72+
mkTrace('t2', 'b', 2_000_000),
73+
];
74+
const fs = new FactSupersession({
75+
llmInvoker: async () => JSON.stringify({ dropIds: ['t1', 't2'] }),
76+
});
77+
const result = await fs.resolve({ traces, query: 'q' });
78+
expect(result.traces.map((t) => t.id)).toEqual(['t1', 't2']);
79+
expect(result.droppedIds).toEqual([]);
80+
expect(result.diagnostics.notes).toContain('fact-supersession:drop-all-rejected');
81+
});
82+
});
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* @module agentos/memory/retrieval/fact-supersession
3+
* @description Post-retrieval LLM-based supersession filter — drops
4+
* memory traces whose factual claims have been superseded by later
5+
* traces about the same subject. Used to push knowledge-update
6+
* accuracy past the ceiling hit by pure retrieval + rerank.
7+
*/
8+
export { FactSupersession } from './FactSupersession.js';
9+
export type {
10+
FactSupersessionOptions,
11+
FactSupersessionInput,
12+
FactSupersessionResult,
13+
} from './FactSupersession.js';

0 commit comments

Comments
 (0)