diff --git a/packages/junior/src/chat/logging.ts b/packages/junior/src/chat/logging.ts index 7a81fd17..c896e1c2 100644 --- a/packages/junior/src/chat/logging.ts +++ b/packages/junior/src/chat/logging.ts @@ -1780,110 +1780,54 @@ function toFiniteTokenCount(value: unknown): number | undefined { return rounded >= 0 ? rounded : undefined; } -function readTokenCount( - root: Record, - keys: string[], -): number | undefined { - for (const key of keys) { - const value = toFiniteTokenCount(root[key]); - if (value !== undefined) { - return value; - } - } - return undefined; -} - -function collectUsageRoots(source: unknown): Record[] { - const sourceRecord = asRecord(source); - if (!sourceRecord) { - return []; - } - - const roots: Record[] = [sourceRecord]; - const usage = asRecord(sourceRecord.usage); - if (usage) { - roots.push(usage); - } - - const tokenUsage = asRecord(sourceRecord.tokenUsage); - if (tokenUsage) { - roots.push(tokenUsage); - } +// pi-ai `Usage` field name -> our camelCase equivalent. This is the only shape +// that reaches the extractor today; pi-ai normalizes every provider response +// into this canonical set before we ever see it. +const PI_USAGE_FIELDS: ReadonlyArray<[string, keyof AgentTurnUsage]> = [ + ["input", "inputTokens"], + ["output", "outputTokens"], + ["cacheRead", "cachedInputTokens"], + ["cacheWrite", "cacheCreationTokens"], + ["totalTokens", "totalTokens"], +]; - const providerMetadata = asRecord(sourceRecord.providerMetadata); - if (providerMetadata) { - roots.push(providerMetadata); - const providerUsage = asRecord(providerMetadata.usage); - if (providerUsage) { - roots.push(providerUsage); - } +function readPiUsage(source: unknown): AgentTurnUsage { + const record = asRecord(source); + if (!record) { + return {}; } - - const response = asRecord(sourceRecord.response); - if (response) { - roots.push(response); - const responseUsage = asRecord(response.usage); - if (responseUsage) { - roots.push(responseUsage); + // Accept either a pi-ai AssistantMessage (has `.usage`) or a bare Usage record. + const usage = asRecord(record.usage) ?? record; + const summary: AgentTurnUsage = {}; + for (const [piKey, ourKey] of PI_USAGE_FIELDS) { + const value = toFiniteTokenCount(usage[piKey]); + if (value !== undefined) { + summary[ourKey] = value; } } - - return roots; + return summary; } -/** Extract a structured token-usage summary from provider metadata roots. */ +/** + * Sum pi-ai `Usage` counters across every source into an `AgentTurnUsage`. + * + * Callers pass every assistant message produced during a turn so the result + * reflects the aggregate usage for the entire turn rather than a single model + * call. Sources without a recognized usage record contribute nothing. + */ export function extractGenAiUsageSummary( ...sources: unknown[] ): AgentTurnUsage { - const roots = sources.flatMap((source) => collectUsageRoots(source)); - if (roots.length === 0) { - return {}; + const summary: AgentTurnUsage = {}; + for (const source of sources) { + const single = readPiUsage(source); + for (const field of Object.keys(single) as (keyof AgentTurnUsage)[]) { + const value = single[field]; + if (value === undefined) continue; + summary[field] = (summary[field] ?? 0) + value; + } } - - const inputTokens = - roots - .map((root) => - readTokenCount(root, [ - "input_tokens", - "inputTokens", - "prompt_tokens", - "promptTokens", - "inputTokenCount", - "promptTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; - - const outputTokens = - roots - .map((root) => - readTokenCount(root, [ - "output_tokens", - "outputTokens", - "completion_tokens", - "completionTokens", - "outputTokenCount", - "completionTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; - - const totalTokens = - roots - .map((root) => - readTokenCount(root, [ - "total_tokens", - "totalTokens", - "totalTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; - - return { - ...(inputTokens !== undefined ? { inputTokens } : {}), - ...(outputTokens !== undefined ? { outputTokens } : {}), - ...(totalTokens !== undefined ? { totalTokens } : {}), - }; + return summary; } /** Extract input/output token counts from AI provider usage metadata for tracing. */ diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index e20a6c44..bc598f13 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -864,12 +864,11 @@ export async function generateAssistantReply( agent.state, ...outputMessages, ); - turnUsage = - usageSummary.inputTokens !== undefined || - usageSummary.outputTokens !== undefined || - usageSummary.totalTokens !== undefined - ? usageSummary - : undefined; + turnUsage = Object.values(usageSummary).some( + (value) => value !== undefined, + ) + ? usageSummary + : undefined; setSpanAttributes({ ...(outputMessagesAttribute ? { "gen_ai.output.messages": outputMessagesAttribute } diff --git a/packages/junior/src/chat/slack/footer.ts b/packages/junior/src/chat/slack/footer.ts index 9fcbcbee..54242ead 100644 --- a/packages/junior/src/chat/slack/footer.ts +++ b/packages/junior/src/chat/slack/footer.ts @@ -53,15 +53,26 @@ function formatSlackDuration(durationMs: number): string { function resolveTotalTokens( usage: AgentTurnUsage | undefined, ): number | undefined { - if (usage?.totalTokens !== undefined) { - return usage.totalTokens; + if (!usage) { + return undefined; } - if (usage?.inputTokens !== undefined && usage.outputTokens !== undefined) { - return usage.inputTokens + usage.outputTokens; + // Sum every individual counter the provider reported so cached + cache + // creation tokens are included in the displayed total. Provider `totalTokens` + // fields are inconsistent across vendors (some exclude cached tokens, some + // include them), so prefer the sum when component counts exist. + const components = [ + usage.inputTokens, + usage.outputTokens, + usage.cachedInputTokens, + usage.cacheCreationTokens, + ].filter((value): value is number => value !== undefined); + + if (components.length > 0) { + return components.reduce((sum, value) => sum + value, 0); } - return undefined; + return usage.totalTokens; } /** Build a compact Slack reply footer so operators can correlate visible replies with backend state. */ diff --git a/packages/junior/src/chat/usage.ts b/packages/junior/src/chat/usage.ts index a09f1563..a364bbd6 100644 --- a/packages/junior/src/chat/usage.ts +++ b/packages/junior/src/chat/usage.ts @@ -1,5 +1,20 @@ +/** + * Structured token usage captured for a single agent turn. + * + * Mirrors the fields pi-ai emits on `AssistantMessage.usage` (see + * `@mariozechner/pi-ai` `Usage`) so diagnostics carry every counter the + * provider normalizes into the pi-ai shape as its own item. Renderers decide + * whether to display a breakdown or a single aggregate. + */ export interface AgentTurnUsage { + /** Non-cached input tokens (pi-ai subtracts cached tokens from this). */ inputTokens?: number; + /** Output tokens; pi-ai folds reasoning tokens into this for providers that report them. */ outputTokens?: number; + /** Cached input tokens read from the provider's prompt cache. */ + cachedInputTokens?: number; + /** Input tokens written into the provider's prompt cache. */ + cacheCreationTokens?: number; + /** Provider-reported total. May not equal the sum of individual counters across providers. */ totalTokens?: number; } diff --git a/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts new file mode 100644 index 00000000..a36a7f49 --- /dev/null +++ b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from "vitest"; +import { extractGenAiUsageSummary } from "@/chat/logging"; + +describe("extractGenAiUsageSummary", () => { + it("returns empty object for sources with no usage metadata", () => { + expect(extractGenAiUsageSummary({}, undefined, null)).toEqual({}); + }); + + it("captures the pi-ai AssistantMessage.usage shape", () => { + const assistantMessage = { + role: "assistant", + usage: { + input: 120, + output: 45, + cacheRead: 900, + cacheWrite: 60, + totalTokens: 1125, + }, + }; + + expect(extractGenAiUsageSummary(assistantMessage)).toEqual({ + inputTokens: 120, + outputTokens: 45, + cachedInputTokens: 900, + cacheCreationTokens: 60, + totalTokens: 1125, + }); + }); + + it("accepts a bare pi-ai Usage record as a source", () => { + expect( + extractGenAiUsageSummary({ + input: 10, + output: 5, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 15, + }), + ).toEqual({ + inputTokens: 10, + outputTokens: 5, + cachedInputTokens: 0, + cacheCreationTokens: 0, + totalTokens: 15, + }); + }); + + it("sums usage across multiple sources (multi-message turn)", () => { + const firstCall = { + usage: { + input: 100, + output: 50, + cacheRead: 10, + cacheWrite: 0, + totalTokens: 160, + }, + }; + const secondCall = { + usage: { + input: 200, + output: 30, + cacheRead: 5, + cacheWrite: 0, + totalTokens: 235, + }, + }; + + expect(extractGenAiUsageSummary(firstCall, secondCall)).toEqual({ + inputTokens: 300, + outputTokens: 80, + cachedInputTokens: 15, + cacheCreationTokens: 0, + totalTokens: 395, + }); + }); + + it("ignores sources without a usage record while summing the rest", () => { + const emptyAgentState = { messages: [] }; + const assistantMessage = { + usage: { + input: 10, + output: 2, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 12, + }, + }; + + expect( + extractGenAiUsageSummary(undefined, emptyAgentState, assistantMessage), + ).toEqual({ + inputTokens: 10, + outputTokens: 2, + cachedInputTokens: 0, + cacheCreationTokens: 0, + totalTokens: 12, + }); + }); +}); diff --git a/packages/junior/tests/unit/slack/footer.test.ts b/packages/junior/tests/unit/slack/footer.test.ts index f274fc72..dc494178 100644 --- a/packages/junior/tests/unit/slack/footer.test.ts +++ b/packages/junior/tests/unit/slack/footer.test.ts @@ -40,6 +40,42 @@ describe("buildSlackReplyFooter", () => { it("omits the footer when no items are available", () => { expect(buildSlackReplyFooter({})).toBeUndefined(); }); + + it("sums individual token counters when rendering the Tokens item", () => { + expect( + buildSlackReplyFooter({ + usage: { + inputTokens: 100, + outputTokens: 50, + cachedInputTokens: 200, + cacheCreationTokens: 10, + totalTokens: 9999, + }, + }), + ).toEqual({ + items: [ + { + label: "Tokens", + value: "360", + }, + ], + }); + }); + + it("falls back to totalTokens when no component counters are reported", () => { + expect( + buildSlackReplyFooter({ + usage: { totalTokens: 1234 }, + }), + ).toEqual({ + items: [ + { + label: "Tokens", + value: "1,234", + }, + ], + }); + }); }); describe("buildSlackReplyBlocks", () => {