feat(ingest-router): implement session summarizer with verbatim Anthropic prompt

jddunn · jddunn · commit cb2282e0f8b7 · 2026-04-24T22:49:33.000-07:00
Adds the Stage L per-session summarizer that powers Anthropic
Contextual Retrieval. ANTHROPIC_CONTEXTUAL_PROMPT is the verbatim
prompt from platform.claude.com/cookbook/capabilities-contextual-
embeddings-guide; two phrases are load-bearing and asserted in tests
to prevent silent prompt drift. Provider-agnostic SummarizerLLM
adapter keeps the executor reusable across OpenAI, Anthropic, local
models, or mocks.

4/4 tests pass.
diff --git a/src/ingest-router/executors/__tests__/sessionSummarizer.test.ts b/src/ingest-router/executors/__tests__/sessionSummarizer.test.ts
@@ -0,0 +1,76 @@
+/**
+ * @file sessionSummarizer.test.ts
+ * @description Tests for the per-session summarizer used by the
+ * Anthropic Contextual Retrieval ingest pipeline (Stage L).
+ */
+
+import { describe, it, expect, vi } from 'vitest';
+import { summarizeSession, ANTHROPIC_CONTEXTUAL_PROMPT } from '../sessionSummarizer.js';
+import type { SummarizerLLM } from '../types.js';
+
+describe('summarizeSession', () => {
+  it('uses the verbatim Anthropic Contextual Retrieval prompt format', async () => {
+    const invoke = vi.fn(async () => ({
+      text: 'Discussion about Q3 deployment strategy',
+      tokensIn: 1500,
+      tokensOut: 8,
+      model: 'gpt-5-mini',
+    }));
+    const llm: SummarizerLLM = { invoke };
+
+    await summarizeSession(
+      { sessionId: 'sess-1', text: 'user: when do we deploy?\nassistant: Q3' },
+      { llm },
+    );
+
+    expect(invoke).toHaveBeenCalledTimes(1);
+    const arg = invoke.mock.calls[0][0];
+    expect(arg.system).toContain('situate this');
+    expect(arg.user).toContain('user: when do we deploy?');
+    expect(arg.temperature).toBe(0);
+    expect(arg.maxTokens).toBeLessThanOrEqual(120);
+  });
+
+  it('returns 50 to 100 token summaries by default', async () => {
+    const llm: SummarizerLLM = {
+      invoke: async () => ({
+        text: 'Short summary',
+        tokensIn: 100,
+        tokensOut: 5,
+        model: 'gpt-5-mini',
+      }),
+    };
+
+    const result = await summarizeSession(
+      { sessionId: 's1', text: 'hello' },
+      { llm },
+    );
+
+    expect(result.summary).toBe('Short summary');
+    expect(result.tokensOut).toBe(5);
+    expect(result.tokensOut).toBeLessThanOrEqual(100);
+  });
+
+  it('exposes the verbatim Anthropic prompt as a constant', () => {
+    expect(ANTHROPIC_CONTEXTUAL_PROMPT).toContain(
+      'Please give a short succinct context to situate this chunk',
+    );
+    expect(ANTHROPIC_CONTEXTUAL_PROMPT).toContain('Answer only with the succinct context');
+  });
+
+  it('returns the sessionId on the result for caching', async () => {
+    const llm: SummarizerLLM = {
+      invoke: async () => ({
+        text: 'Cached summary text',
+        tokensIn: 50,
+        tokensOut: 3,
+        model: 'gpt-5-mini',
+      }),
+    };
+    const result = await summarizeSession(
+      { sessionId: 'sess-XYZ', text: 'foo' },
+      { llm },
+    );
+    expect(result.sessionId).toBe('sess-XYZ');
+  });
+});
diff --git a/src/ingest-router/executors/sessionSummarizer.ts b/src/ingest-router/executors/sessionSummarizer.ts
@@ -0,0 +1,55 @@
+/**
+ * @file sessionSummarizer.ts
+ * @description One-shot session summarizer using the verbatim Anthropic
+ * Contextual Retrieval prompt. Source:
+ * platform.claude.com/cookbook/capabilities-contextual-embeddings-guide.
+ *
+ * Cost model (per Anthropic): ~$0.003 per session at gpt-5-mini, fully
+ * cached after first run via {@link SummarizedIngestExecutor}'s
+ * per-sessionId cache.
+ *
+ * @module @framers/agentos/ingest-router/executors/sessionSummarizer
+ */
+
+import type { SessionContent, SummarizerLLM } from './types.js';
+
+/**
+ * Verbatim Anthropic Contextual Retrieval prompt. Two phrases are
+ * load-bearing: "situate this" identifies the recipe lineage, and
+ * "Answer only with the succinct context" prevents the model from
+ * adding preambles that pollute the embedding text. Asserted in
+ * sessionSummarizer.test.ts to prevent silent prompt drift.
+ */
+export const ANTHROPIC_CONTEXTUAL_PROMPT = `You are summarizing a conversation session for retrieval. Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk.
+Answer only with the succinct context and nothing else.`;
+
+/**
+ * Run one summarize call against a session. Returns a structured result
+ * with the trimmed summary plus token usage for cost accounting.
+ *
+ * Caller-supplied {@link SummarizerLLM} is provider-agnostic; the
+ * shipping config wires gpt-5-mini for single-OpenAI-key reproducibility.
+ */
+export async function summarizeSession(
+  session: SessionContent,
+  opts: { llm: SummarizerLLM; maxSummaryTokens?: number },
+): Promise<{
+  sessionId: string;
+  summary: string;
+  tokensIn: number;
+  tokensOut: number;
+}> {
+  const maxTokens = opts.maxSummaryTokens ?? 100;
+  const result = await opts.llm.invoke({
+    system: ANTHROPIC_CONTEXTUAL_PROMPT,
+    user: session.text,
+    maxTokens,
+    temperature: 0,
+  });
+  return {
+    sessionId: session.sessionId,
+    summary: result.text.trim(),
+    tokensIn: result.tokensIn,
+    tokensOut: result.tokensOut,
+  };
+}