Skip to content

Commit 162d7ab

Browse files
committed
feat(memory-router): productionize ReaderRouter primitive
Adds the per-query reader-tier dispatch primitive to agentos core, parallel to the existing MemoryRouter (backend dispatch) and RetrievalConfigRouter (retrieval-config dispatch). Completes the orchestration triplet where all three consume the same gpt-5-mini classifier output to minimize $/correct on conversational-memory benchmarks. Two presets ship: - MIN_COST_BEST_CAT_2026_04_28_TABLE — calibrated from LongMemEval-S Phase B N=500 per-category accuracy split between gpt-4o and gpt-5-mini at the canonical-hybrid retrieval stack. Oracle aggregate 87.0%; realized 85.6% via the bench's standalone- classifier path (Phase B headline 2026-04-28). - MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE — replaces the gpt-4o picks for TR and SSU with gpt-5. Phase A small-sample signal of +4 pp on TR plus cheaper input pricing. Phase B at N=500 is the validation gate (currently in flight). The primitive is intentionally identical in shape to the bench's local readerRouter.ts (which was the v1 calibration source); the bench will swap its local copy to import from @framers/agentos/memory-router in a follow-up commit. 27 contract tests pin the type union, every per-category dispatch decision, completeness invariants, frozen invariants, and the 'every gpt-4o pick replaced by gpt-5' invariant relating the two presets.
1 parent becde78 commit 162d7ab

3 files changed

Lines changed: 433 additions & 0 deletions

File tree

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/**
2+
* @file reader-router.test.ts
3+
* @description Contract tests for the ReaderRouter primitive — pinned
4+
* type contract, calibration data shape, selector behavior, and
5+
* registry invariants.
6+
*
7+
* Source data: 2026-04-28 LongMemEval-S Phase B N=500 per-category
8+
* accuracy split between gpt-4o and gpt-5-mini at the canonical-hybrid
9+
* retrieval stack (file header in `reader-router.ts` for the table).
10+
*/
11+
12+
import { describe, expect, it } from 'vitest';
13+
import {
14+
MIN_COST_BEST_CAT_2026_04_28_TABLE,
15+
MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE,
16+
READER_ROUTER_PRESET_TABLES,
17+
ReaderRouterUnknownCategoryError,
18+
ReaderRouterUnknownPresetError,
19+
selectReader,
20+
type ReaderRouterPreset,
21+
type ReaderTier,
22+
} from '../reader-router.js';
23+
import {
24+
MEMORY_QUERY_CATEGORIES,
25+
type MemoryQueryCategory,
26+
} from '../routing-tables.js';
27+
28+
const VALID_TIERS: readonly ReaderTier[] = ['gpt-4o', 'gpt-5', 'gpt-5-mini'] as const;
29+
30+
describe('selectReader: min-cost-best-cat-2026-04-28 calibration', () => {
31+
const PRESET: ReaderRouterPreset = 'min-cost-best-cat-2026-04-28';
32+
33+
it('routes temporal-reasoning to gpt-4o (clear +11.8 pp accuracy lift)', () => {
34+
expect(selectReader('temporal-reasoning', PRESET)).toBe('gpt-4o');
35+
});
36+
37+
it('routes single-session-user to gpt-4o (+4.3 pp accuracy lift)', () => {
38+
expect(selectReader('single-session-user', PRESET)).toBe('gpt-4o');
39+
});
40+
41+
it('routes single-session-preference to gpt-5-mini (+23.4 pp lift, biggest single-category swing)', () => {
42+
expect(selectReader('single-session-preference', PRESET)).toBe('gpt-5-mini');
43+
});
44+
45+
it('routes single-session-assistant to gpt-5-mini (tied accuracy, cheaper reader)', () => {
46+
expect(selectReader('single-session-assistant', PRESET)).toBe('gpt-5-mini');
47+
});
48+
49+
it('routes knowledge-update to gpt-5-mini (tied accuracy, cheaper reader)', () => {
50+
expect(selectReader('knowledge-update', PRESET)).toBe('gpt-5-mini');
51+
});
52+
53+
it('routes multi-session to gpt-5-mini (+3.5 pp accuracy lift, also cheaper)', () => {
54+
expect(selectReader('multi-session', PRESET)).toBe('gpt-5-mini');
55+
});
56+
});
57+
58+
describe('selectReader: min-cost-best-cat-gpt5-tr-2026-04-29 calibration', () => {
59+
const PRESET: ReaderRouterPreset = 'min-cost-best-cat-gpt5-tr-2026-04-29';
60+
61+
it('routes temporal-reasoning to gpt-5 (Phase A small-sample +4.2 pp PE vs gpt-4o)', () => {
62+
expect(selectReader('temporal-reasoning', PRESET)).toBe('gpt-5');
63+
});
64+
65+
it('routes single-session-user to gpt-5 (Phase A small-sample lift over gpt-4o)', () => {
66+
expect(selectReader('single-session-user', PRESET)).toBe('gpt-5');
67+
});
68+
69+
it('keeps single-session-preference on gpt-5-mini (gpt-5-mini was the +23.4 pp winner)', () => {
70+
expect(selectReader('single-session-preference', PRESET)).toBe('gpt-5-mini');
71+
});
72+
73+
it('keeps single-session-assistant on gpt-5-mini (gpt-5-mini ties; cheaper)', () => {
74+
expect(selectReader('single-session-assistant', PRESET)).toBe('gpt-5-mini');
75+
});
76+
77+
it('keeps knowledge-update on gpt-5-mini (gpt-5-mini ties; cheaper)', () => {
78+
expect(selectReader('knowledge-update', PRESET)).toBe('gpt-5-mini');
79+
});
80+
81+
it('keeps multi-session on gpt-5-mini (gpt-5-mini was the +3.5 pp winner)', () => {
82+
expect(selectReader('multi-session', PRESET)).toBe('gpt-5-mini');
83+
});
84+
});
85+
86+
describe('selectReader: error paths', () => {
87+
it('throws ReaderRouterUnknownPresetError on unknown preset', () => {
88+
expect(() =>
89+
selectReader('multi-session', 'not-a-preset' as ReaderRouterPreset),
90+
).toThrow(ReaderRouterUnknownPresetError);
91+
});
92+
93+
it('throws ReaderRouterUnknownCategoryError when a category is missing from the table', () => {
94+
// Cannot call selectReader with a bogus category through the type
95+
// system, but the runtime guard MUST fire if a future table
96+
// addition forgets a category. Simulate via a structural cast.
97+
expect(() =>
98+
selectReader(
99+
'not-a-category' as MemoryQueryCategory,
100+
'min-cost-best-cat-2026-04-28',
101+
),
102+
).toThrow(ReaderRouterUnknownCategoryError);
103+
});
104+
});
105+
106+
describe('MIN_COST_BEST_CAT_2026_04_28_TABLE: completeness invariant', () => {
107+
it('covers all six MemoryQueryCategory values', () => {
108+
for (const cat of MEMORY_QUERY_CATEGORIES) {
109+
expect(MIN_COST_BEST_CAT_2026_04_28_TABLE.mapping[cat]).toBeDefined();
110+
}
111+
});
112+
113+
it('only references valid OpenAI reader tiers (gpt-4o or gpt-5-mini)', () => {
114+
for (const cat of MEMORY_QUERY_CATEGORIES) {
115+
const reader = MIN_COST_BEST_CAT_2026_04_28_TABLE.mapping[cat];
116+
expect(['gpt-4o', 'gpt-5-mini']).toContain(reader);
117+
}
118+
});
119+
120+
it('has the expected preset id', () => {
121+
expect(MIN_COST_BEST_CAT_2026_04_28_TABLE.preset).toBe(
122+
'min-cost-best-cat-2026-04-28',
123+
);
124+
});
125+
126+
it('frozen at module load (table + mapping)', () => {
127+
expect(Object.isFrozen(MIN_COST_BEST_CAT_2026_04_28_TABLE)).toBe(true);
128+
expect(Object.isFrozen(MIN_COST_BEST_CAT_2026_04_28_TABLE.mapping)).toBe(true);
129+
});
130+
});
131+
132+
describe('MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE: completeness invariant', () => {
133+
it('covers all six MemoryQueryCategory values', () => {
134+
for (const cat of MEMORY_QUERY_CATEGORIES) {
135+
expect(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.mapping[cat]).toBeDefined();
136+
}
137+
});
138+
139+
it('only references gpt-5 and gpt-5-mini reader tiers', () => {
140+
for (const cat of MEMORY_QUERY_CATEGORIES) {
141+
const reader = MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.mapping[cat];
142+
expect(['gpt-5', 'gpt-5-mini']).toContain(reader);
143+
}
144+
});
145+
146+
it('has the expected preset id', () => {
147+
expect(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.preset).toBe(
148+
'min-cost-best-cat-gpt5-tr-2026-04-29',
149+
);
150+
});
151+
152+
it('replaces every gpt-4o pick from MIN_COST_BEST_CAT_2026_04_28 with gpt-5', () => {
153+
for (const cat of MEMORY_QUERY_CATEGORIES) {
154+
if (MIN_COST_BEST_CAT_2026_04_28_TABLE.mapping[cat] === 'gpt-4o') {
155+
expect(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.mapping[cat]).toBe('gpt-5');
156+
} else {
157+
expect(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.mapping[cat]).toBe(
158+
MIN_COST_BEST_CAT_2026_04_28_TABLE.mapping[cat],
159+
);
160+
}
161+
}
162+
});
163+
164+
it('frozen at module load (table + mapping)', () => {
165+
expect(Object.isFrozen(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE)).toBe(true);
166+
expect(Object.isFrozen(MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE.mapping)).toBe(true);
167+
});
168+
});
169+
170+
describe('READER_ROUTER_PRESET_TABLES: registry', () => {
171+
it('exposes min-cost-best-cat-2026-04-28 keyed by preset name', () => {
172+
expect(READER_ROUTER_PRESET_TABLES['min-cost-best-cat-2026-04-28']).toBe(
173+
MIN_COST_BEST_CAT_2026_04_28_TABLE,
174+
);
175+
});
176+
177+
it('exposes min-cost-best-cat-gpt5-tr-2026-04-29 keyed by preset name', () => {
178+
expect(READER_ROUTER_PRESET_TABLES['min-cost-best-cat-gpt5-tr-2026-04-29']).toBe(
179+
MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE,
180+
);
181+
});
182+
183+
it('frozen at module load', () => {
184+
expect(Object.isFrozen(READER_ROUTER_PRESET_TABLES)).toBe(true);
185+
});
186+
187+
it('every registered table only references valid reader tiers', () => {
188+
for (const preset of Object.keys(
189+
READER_ROUTER_PRESET_TABLES,
190+
) as ReaderRouterPreset[]) {
191+
const table = READER_ROUTER_PRESET_TABLES[preset];
192+
for (const cat of MEMORY_QUERY_CATEGORIES) {
193+
expect(VALID_TIERS).toContain<ReaderTier>(table.mapping[cat]);
194+
}
195+
}
196+
});
197+
});

src/memory-router/index.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,24 @@ export type {
267267
RetrievalConfigId,
268268
RetrievalConfigSpec,
269269
} from './retrieval-config.js';
270+
271+
// ============================================================================
272+
// ReaderRouter (per-query reader-tier dispatch, calibrated from
273+
// 2026-04-28 LongMemEval-S Phase B N=500 per-category gpt-4o vs
274+
// gpt-5-mini split)
275+
// ============================================================================
276+
277+
export {
278+
MIN_COST_BEST_CAT_2026_04_28_TABLE as READER_MIN_COST_BEST_CAT_2026_04_28_TABLE,
279+
MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE as READER_MIN_COST_BEST_CAT_GPT5_TR_2026_04_29_TABLE,
280+
READER_ROUTER_PRESET_TABLES,
281+
ReaderRouterUnknownCategoryError,
282+
ReaderRouterUnknownPresetError,
283+
selectReader,
284+
} from './reader-router.js';
285+
286+
export type {
287+
ReaderTier,
288+
ReaderRouterPreset,
289+
ReaderRouterTable,
290+
} from './reader-router.js';

0 commit comments

Comments
 (0)