|
| 1 | +/** |
| 2 | + * @file types.ts |
| 3 | + * @description Public types for the IngestRouter reference executors. |
| 4 | + * |
| 5 | + * Stage L (Anthropic Contextual Retrieval): one LLM summarize call per |
| 6 | + * session, summary prepended to every chunk in that session before |
| 7 | + * embedding. See the verbatim recipe at: |
| 8 | + * platform.claude.com/cookbook/capabilities-contextual-embeddings-guide. |
| 9 | + * |
| 10 | + * Stage I (Mem0 v3 entity-linking) types are added in a separate task |
| 11 | + * but live in this same file so consumers of `@framers/agentos/ingest-router` |
| 12 | + * find every executor type in one place. |
| 13 | + * |
| 14 | + * @module @framers/agentos/ingest-router/executors/types |
| 15 | + */ |
| 16 | + |
| 17 | +/** |
| 18 | + * Single conversational session passed to the summarizer. |
| 19 | + */ |
| 20 | +export interface SessionContent { |
| 21 | + /** Stable session identifier; used as the cache key. */ |
| 22 | + sessionId: string; |
| 23 | + /** Full session text (turn list, document body, etc.). */ |
| 24 | + text: string; |
| 25 | +} |
| 26 | + |
| 27 | +/** |
| 28 | + * Provider-agnostic LLM adapter for the summarize call. Implementations |
| 29 | + * wrap an OpenAI / Anthropic / local-model client. Single-OpenAI-key |
| 30 | + * reproducibility means the shipping configuration uses gpt-5-mini. |
| 31 | + */ |
| 32 | +export interface SummarizerLLM { |
| 33 | + invoke(req: { |
| 34 | + system: string; |
| 35 | + user: string; |
| 36 | + maxTokens: number; |
| 37 | + temperature: number; |
| 38 | + }): Promise<{ |
| 39 | + text: string; |
| 40 | + tokensIn: number; |
| 41 | + tokensOut: number; |
| 42 | + model: string; |
| 43 | + }>; |
| 44 | +} |
| 45 | + |
| 46 | +/** |
| 47 | + * Constructor options for {@link SummarizedIngestExecutor}. |
| 48 | + */ |
| 49 | +export interface SummarizedIngestOptions { |
| 50 | + /** LLM adapter used for the per-session summarize call. */ |
| 51 | + llm: SummarizerLLM; |
| 52 | + /** |
| 53 | + * Override the default 100-token summary cap. Anthropic's recipe |
| 54 | + * targets 50-100 tokens of context per chunk; override to tune the |
| 55 | + * cost / context-density trade-off. |
| 56 | + */ |
| 57 | + maxSummaryTokens?: number; |
| 58 | + /** |
| 59 | + * Override the default cache key (sessionId). Useful when sessions |
| 60 | + * share semantic identity across rebrandings (e.g., user renames). |
| 61 | + */ |
| 62 | + cacheKey?: (session: SessionContent) => string; |
| 63 | +} |
| 64 | + |
| 65 | +/** |
| 66 | + * One emitted trace from the summarized executor. Each chunk in a |
| 67 | + * session becomes one trace; all traces in the session share the same |
| 68 | + * summary prefix. |
| 69 | + */ |
| 70 | +export interface SummarizedTrace { |
| 71 | + sessionId: string; |
| 72 | + chunkIndex: number; |
| 73 | + /** Text passed to the embedder: `${summary}\n\n${rawText}`. */ |
| 74 | + embedText: string; |
| 75 | + /** Original chunk content, before summary prepend. */ |
| 76 | + rawText: string; |
| 77 | + /** Per-session summary, shared across every chunk in the session. */ |
| 78 | + summary: string; |
| 79 | +} |
0 commit comments