Skip to content

Commit cb2282e

Browse files
committed
feat(ingest-router): implement session summarizer with verbatim Anthropic prompt
Adds the Stage L per-session summarizer that powers Anthropic Contextual Retrieval. ANTHROPIC_CONTEXTUAL_PROMPT is the verbatim prompt from platform.claude.com/cookbook/capabilities-contextual- embeddings-guide; two phrases are load-bearing and asserted in tests to prevent silent prompt drift. Provider-agnostic SummarizerLLM adapter keeps the executor reusable across OpenAI, Anthropic, local models, or mocks. 4/4 tests pass.
1 parent 1c41478 commit cb2282e

2 files changed

Lines changed: 131 additions & 0 deletions

File tree

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/**
2+
* @file sessionSummarizer.test.ts
3+
* @description Tests for the per-session summarizer used by the
4+
* Anthropic Contextual Retrieval ingest pipeline (Stage L).
5+
*/
6+
7+
import { describe, it, expect, vi } from 'vitest';
8+
import { summarizeSession, ANTHROPIC_CONTEXTUAL_PROMPT } from '../sessionSummarizer.js';
9+
import type { SummarizerLLM } from '../types.js';
10+
11+
describe('summarizeSession', () => {
12+
it('uses the verbatim Anthropic Contextual Retrieval prompt format', async () => {
13+
const invoke = vi.fn(async () => ({
14+
text: 'Discussion about Q3 deployment strategy',
15+
tokensIn: 1500,
16+
tokensOut: 8,
17+
model: 'gpt-5-mini',
18+
}));
19+
const llm: SummarizerLLM = { invoke };
20+
21+
await summarizeSession(
22+
{ sessionId: 'sess-1', text: 'user: when do we deploy?\nassistant: Q3' },
23+
{ llm },
24+
);
25+
26+
expect(invoke).toHaveBeenCalledTimes(1);
27+
const arg = invoke.mock.calls[0][0];
28+
expect(arg.system).toContain('situate this');
29+
expect(arg.user).toContain('user: when do we deploy?');
30+
expect(arg.temperature).toBe(0);
31+
expect(arg.maxTokens).toBeLessThanOrEqual(120);
32+
});
33+
34+
it('returns 50 to 100 token summaries by default', async () => {
35+
const llm: SummarizerLLM = {
36+
invoke: async () => ({
37+
text: 'Short summary',
38+
tokensIn: 100,
39+
tokensOut: 5,
40+
model: 'gpt-5-mini',
41+
}),
42+
};
43+
44+
const result = await summarizeSession(
45+
{ sessionId: 's1', text: 'hello' },
46+
{ llm },
47+
);
48+
49+
expect(result.summary).toBe('Short summary');
50+
expect(result.tokensOut).toBe(5);
51+
expect(result.tokensOut).toBeLessThanOrEqual(100);
52+
});
53+
54+
it('exposes the verbatim Anthropic prompt as a constant', () => {
55+
expect(ANTHROPIC_CONTEXTUAL_PROMPT).toContain(
56+
'Please give a short succinct context to situate this chunk',
57+
);
58+
expect(ANTHROPIC_CONTEXTUAL_PROMPT).toContain('Answer only with the succinct context');
59+
});
60+
61+
it('returns the sessionId on the result for caching', async () => {
62+
const llm: SummarizerLLM = {
63+
invoke: async () => ({
64+
text: 'Cached summary text',
65+
tokensIn: 50,
66+
tokensOut: 3,
67+
model: 'gpt-5-mini',
68+
}),
69+
};
70+
const result = await summarizeSession(
71+
{ sessionId: 'sess-XYZ', text: 'foo' },
72+
{ llm },
73+
);
74+
expect(result.sessionId).toBe('sess-XYZ');
75+
});
76+
});
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* @file sessionSummarizer.ts
3+
* @description One-shot session summarizer using the verbatim Anthropic
4+
* Contextual Retrieval prompt. Source:
5+
* platform.claude.com/cookbook/capabilities-contextual-embeddings-guide.
6+
*
7+
* Cost model (per Anthropic): ~$0.003 per session at gpt-5-mini, fully
8+
* cached after first run via {@link SummarizedIngestExecutor}'s
9+
* per-sessionId cache.
10+
*
11+
* @module @framers/agentos/ingest-router/executors/sessionSummarizer
12+
*/
13+
14+
import type { SessionContent, SummarizerLLM } from './types.js';
15+
16+
/**
17+
* Verbatim Anthropic Contextual Retrieval prompt. Two phrases are
18+
* load-bearing: "situate this" identifies the recipe lineage, and
19+
* "Answer only with the succinct context" prevents the model from
20+
* adding preambles that pollute the embedding text. Asserted in
21+
* sessionSummarizer.test.ts to prevent silent prompt drift.
22+
*/
23+
export const ANTHROPIC_CONTEXTUAL_PROMPT = `You are summarizing a conversation session for retrieval. Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk.
24+
Answer only with the succinct context and nothing else.`;
25+
26+
/**
27+
* Run one summarize call against a session. Returns a structured result
28+
* with the trimmed summary plus token usage for cost accounting.
29+
*
30+
* Caller-supplied {@link SummarizerLLM} is provider-agnostic; the
31+
* shipping config wires gpt-5-mini for single-OpenAI-key reproducibility.
32+
*/
33+
export async function summarizeSession(
34+
session: SessionContent,
35+
opts: { llm: SummarizerLLM; maxSummaryTokens?: number },
36+
): Promise<{
37+
sessionId: string;
38+
summary: string;
39+
tokensIn: number;
40+
tokensOut: number;
41+
}> {
42+
const maxTokens = opts.maxSummaryTokens ?? 100;
43+
const result = await opts.llm.invoke({
44+
system: ANTHROPIC_CONTEXTUAL_PROMPT,
45+
user: session.text,
46+
maxTokens,
47+
temperature: 0,
48+
});
49+
return {
50+
sessionId: session.sessionId,
51+
summary: result.text.trim(),
52+
tokensIn: result.tokensIn,
53+
tokensOut: result.tokensOut,
54+
};
55+
}

0 commit comments

Comments
 (0)