From 04f0d4908704d884cc81cd6a23ce4802d19dd36a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 21:55:44 +0000 Subject: [PATCH 1/2] fix(diagnostics): count all token types (input, output, cached, reasoning) The previous extractor only read `input_tokens`/`output_tokens`/`total_tokens` aliases and picked the first defined value across sources. This missed the pi-ai Usage shape (`input`, `output`, `cacheRead`, `cacheWrite`) entirely for per-field reads and also failed to sum usage across multiple assistant messages in a single turn. - Extend `AgentTurnUsage` with `cachedInputTokens`, `cacheCreationTokens`, and `reasoningTokens` so diagnostics carry every counter the provider reports as its own field. - Teach `extractGenAiUsageSummary` to recognize pi-ai aliases and to sum counters across sources so multi-message turns report aggregate usage. - Render the Slack footer "Tokens" value as the sum of all reported component counters (input + output + cachedInput + cacheCreation + reasoning) instead of relying on the provider's inconsistent `totalTokens` field. Fall back to `totalTokens` only when no component counters were reported. Co-Authored-By: Devin Co-Authored-By: David Cramer --- packages/junior/src/chat/logging.ts | 124 +++++++++++------- packages/junior/src/chat/respond.ts | 11 +- packages/junior/src/chat/slack/footer.ts | 22 +++- packages/junior/src/chat/usage.ts | 19 +++ .../extract-gen-ai-usage-summary.test.ts | 105 +++++++++++++++ .../junior/tests/unit/slack/footer.test.ts | 37 ++++++ 6 files changed, 259 insertions(+), 59 deletions(-) create mode 100644 packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts diff --git a/packages/junior/src/chat/logging.ts b/packages/junior/src/chat/logging.ts index 7a81fd17..6ea321a2 100644 --- a/packages/junior/src/chat/logging.ts +++ b/packages/junior/src/chat/logging.ts @@ -1831,59 +1831,87 @@ function collectUsageRoots(source: unknown): Record[] { return roots; } -/** Extract a structured token-usage summary from provider metadata roots. */ -export function extractGenAiUsageSummary( - ...sources: unknown[] -): AgentTurnUsage { - const roots = sources.flatMap((source) => collectUsageRoots(source)); +const USAGE_FIELD_ALIASES: Record = { + inputTokens: [ + // pi-ai subtracts cached tokens from `input`; we count them separately below. + "input", + "input_tokens", + "inputTokens", + "prompt_tokens", + "promptTokens", + "inputTokenCount", + "promptTokenCount", + ], + outputTokens: [ + "output", + "output_tokens", + "outputTokens", + "completion_tokens", + "completionTokens", + "outputTokenCount", + "completionTokenCount", + ], + cachedInputTokens: [ + "cacheRead", + "cached_tokens", + "cachedTokens", + "cached_input_tokens", + "cachedInputTokens", + "cache_read_input_tokens", + "cacheReadInputTokens", + ], + cacheCreationTokens: [ + "cacheWrite", + "cache_creation_input_tokens", + "cacheCreationInputTokens", + "cache_write_tokens", + "cacheWriteTokens", + ], + reasoningTokens: ["reasoning_tokens", "reasoningTokens"], + totalTokens: ["total_tokens", "totalTokens", "totalTokenCount"], +}; + +function extractUsageFromSource(source: unknown): AgentTurnUsage { + const roots = collectUsageRoots(source); if (roots.length === 0) { return {}; } - const inputTokens = - roots - .map((root) => - readTokenCount(root, [ - "input_tokens", - "inputTokens", - "prompt_tokens", - "promptTokens", - "inputTokenCount", - "promptTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; - - const outputTokens = - roots - .map((root) => - readTokenCount(root, [ - "output_tokens", - "outputTokens", - "completion_tokens", - "completionTokens", - "outputTokenCount", - "completionTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; - - const totalTokens = - roots - .map((root) => - readTokenCount(root, [ - "total_tokens", - "totalTokens", - "totalTokenCount", - ]), - ) - .find((value) => value !== undefined) ?? undefined; + const summary: AgentTurnUsage = {}; + for (const [field, aliases] of Object.entries(USAGE_FIELD_ALIASES) as [ + keyof AgentTurnUsage, + string[], + ][]) { + const value = + roots + .map((root) => readTokenCount(root, aliases)) + .find((candidate) => candidate !== undefined) ?? undefined; + if (value !== undefined) { + summary[field] = value; + } + } + return summary; +} - return { - ...(inputTokens !== undefined ? { inputTokens } : {}), - ...(outputTokens !== undefined ? { outputTokens } : {}), - ...(totalTokens !== undefined ? { totalTokens } : {}), - }; +/** + * Extract a structured token-usage summary from provider metadata roots. + * + * Values are summed across sources so callers can pass every assistant message + * produced during a turn and get the aggregate usage for that turn. + */ +export function extractGenAiUsageSummary( + ...sources: unknown[] +): AgentTurnUsage { + const summary: AgentTurnUsage = {}; + for (const source of sources) { + const single = extractUsageFromSource(source); + for (const field of Object.keys(single) as (keyof AgentTurnUsage)[]) { + const value = single[field]; + if (value === undefined) continue; + summary[field] = (summary[field] ?? 0) + value; + } + } + return summary; } /** Extract input/output token counts from AI provider usage metadata for tracing. */ diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index e20a6c44..bc598f13 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -864,12 +864,11 @@ export async function generateAssistantReply( agent.state, ...outputMessages, ); - turnUsage = - usageSummary.inputTokens !== undefined || - usageSummary.outputTokens !== undefined || - usageSummary.totalTokens !== undefined - ? usageSummary - : undefined; + turnUsage = Object.values(usageSummary).some( + (value) => value !== undefined, + ) + ? usageSummary + : undefined; setSpanAttributes({ ...(outputMessagesAttribute ? { "gen_ai.output.messages": outputMessagesAttribute } diff --git a/packages/junior/src/chat/slack/footer.ts b/packages/junior/src/chat/slack/footer.ts index 9fcbcbee..148ad675 100644 --- a/packages/junior/src/chat/slack/footer.ts +++ b/packages/junior/src/chat/slack/footer.ts @@ -53,15 +53,27 @@ function formatSlackDuration(durationMs: number): string { function resolveTotalTokens( usage: AgentTurnUsage | undefined, ): number | undefined { - if (usage?.totalTokens !== undefined) { - return usage.totalTokens; + if (!usage) { + return undefined; } - if (usage?.inputTokens !== undefined && usage.outputTokens !== undefined) { - return usage.inputTokens + usage.outputTokens; + // Sum every individual counter the provider reported so cached + cache + // creation + reasoning tokens are included in the displayed total. Provider + // `totalTokens` fields are inconsistent across vendors (some exclude cached + // tokens, some include them), so prefer the sum when component counts exist. + const components = [ + usage.inputTokens, + usage.outputTokens, + usage.cachedInputTokens, + usage.cacheCreationTokens, + usage.reasoningTokens, + ].filter((value): value is number => value !== undefined); + + if (components.length > 0) { + return components.reduce((sum, value) => sum + value, 0); } - return undefined; + return usage.totalTokens; } /** Build a compact Slack reply footer so operators can correlate visible replies with backend state. */ diff --git a/packages/junior/src/chat/usage.ts b/packages/junior/src/chat/usage.ts index a09f1563..b4a998eb 100644 --- a/packages/junior/src/chat/usage.ts +++ b/packages/junior/src/chat/usage.ts @@ -1,5 +1,24 @@ +/** + * Structured token usage captured for a single agent turn. + * + * Fields are stored individually so renderers can decide whether to display a + * breakdown or a single aggregate. Providers only populate the counters they + * report; missing fields mean "not reported" rather than zero. + */ export interface AgentTurnUsage { inputTokens?: number; outputTokens?: number; + cachedInputTokens?: number; + cacheCreationTokens?: number; + reasoningTokens?: number; totalTokens?: number; } + +export const AGENT_TURN_USAGE_KEYS = [ + "inputTokens", + "outputTokens", + "cachedInputTokens", + "cacheCreationTokens", + "reasoningTokens", + "totalTokens", +] as const satisfies readonly (keyof AgentTurnUsage)[]; diff --git a/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts new file mode 100644 index 00000000..719ec866 --- /dev/null +++ b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts @@ -0,0 +1,105 @@ +import { describe, expect, it } from "vitest"; +import { extractGenAiUsageSummary } from "@/chat/logging"; + +describe("extractGenAiUsageSummary", () => { + it("returns empty object for sources with no usage metadata", () => { + expect(extractGenAiUsageSummary({}, undefined, null)).toEqual({}); + }); + + it("captures pi-ai AssistantMessage usage shape", () => { + const assistantMessage = { + role: "assistant", + usage: { + input: 120, + output: 45, + cacheRead: 900, + cacheWrite: 60, + totalTokens: 1125, + }, + }; + + expect(extractGenAiUsageSummary(assistantMessage)).toEqual({ + inputTokens: 120, + outputTokens: 45, + cachedInputTokens: 900, + cacheCreationTokens: 60, + totalTokens: 1125, + }); + }); + + it("captures OpenAI-style prompt_tokens_details.cached_tokens", () => { + const providerResponse = { + usage: { + prompt_tokens: 500, + completion_tokens: 200, + total_tokens: 700, + prompt_tokens_details: { + cached_tokens: 300, + }, + completion_tokens_details: { + reasoning_tokens: 50, + }, + }, + }; + + // The shared extractor only reads direct keys, not nested *_details + // records, but the top-level aliases still capture the primary counters. + expect(extractGenAiUsageSummary(providerResponse)).toEqual({ + inputTokens: 500, + outputTokens: 200, + totalTokens: 700, + }); + }); + + it("sums usage across multiple sources (multi-message turn)", () => { + const firstCall = { + usage: { + input: 100, + output: 50, + cacheRead: 10, + cacheWrite: 0, + totalTokens: 160, + }, + }; + const secondCall = { + usage: { + input: 200, + output: 30, + cacheRead: 5, + cacheWrite: 0, + totalTokens: 235, + }, + }; + + expect(extractGenAiUsageSummary(firstCall, secondCall)).toEqual({ + inputTokens: 300, + outputTokens: 80, + cachedInputTokens: 15, + cacheCreationTokens: 0, + totalTokens: 395, + }); + }); + + it("ignores sources without a usage record while summing the rest", () => { + const emptyAgentState = { messages: [] }; + const assistantMessage = { + usage: { + input: 10, + output: 2, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 12, + }, + }; + + expect( + extractGenAiUsageSummary(undefined, emptyAgentState, assistantMessage), + ).toEqual({ + inputTokens: 10, + outputTokens: 2, + cachedInputTokens: 0, + cacheCreationTokens: 0, + totalTokens: 12, + }); + }); +}); diff --git a/packages/junior/tests/unit/slack/footer.test.ts b/packages/junior/tests/unit/slack/footer.test.ts index f274fc72..782f0ab0 100644 --- a/packages/junior/tests/unit/slack/footer.test.ts +++ b/packages/junior/tests/unit/slack/footer.test.ts @@ -40,6 +40,43 @@ describe("buildSlackReplyFooter", () => { it("omits the footer when no items are available", () => { expect(buildSlackReplyFooter({})).toBeUndefined(); }); + + it("sums individual token counters when rendering the Tokens item", () => { + expect( + buildSlackReplyFooter({ + usage: { + inputTokens: 100, + outputTokens: 50, + cachedInputTokens: 200, + cacheCreationTokens: 10, + reasoningTokens: 5, + totalTokens: 9999, + }, + }), + ).toEqual({ + items: [ + { + label: "Tokens", + value: "365", + }, + ], + }); + }); + + it("falls back to totalTokens when no component counters are reported", () => { + expect( + buildSlackReplyFooter({ + usage: { totalTokens: 1234 }, + }), + ).toEqual({ + items: [ + { + label: "Tokens", + value: "1,234", + }, + ], + }); + }); }); describe("buildSlackReplyBlocks", () => { From 512c423273ca2ffa8a3767d3080ab2361c38d087 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 22:28:29 +0000 Subject: [PATCH 2/2] refactor: drop speculative usage aliases, trim to pi-ai Usage shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only source reaching extractGenAiUsageSummary is pi-ai's normalized AssistantMessage.usage (input, output, cacheRead, cacheWrite, totalTokens). The OpenAI/Anthropic/Gemini-style aliases (input_tokens, prompt_tokens, cached_input_tokens, etc.) never matched anything in practice — they coincidentally agreed on totalTokens only. - Remove collectUsageRoots/readTokenCount/alias table in favor of a single PI_USAGE_FIELDS map from pi-ai field name to AgentTurnUsage field name. - Drop reasoningTokens from AgentTurnUsage; pi-ai folds reasoning into output already and never exposes it as a separate top-level field. - Update footer and tests accordingly. Co-Authored-By: Devin Co-Authored-By: David Cramer --- packages/junior/src/chat/logging.ts | 130 ++++-------------- packages/junior/src/chat/slack/footer.ts | 7 +- packages/junior/src/chat/usage.ts | 22 ++- .../extract-gen-ai-usage-summary.test.ts | 38 +++-- .../junior/tests/unit/slack/footer.test.ts | 3 +- 5 files changed, 52 insertions(+), 148 deletions(-) diff --git a/packages/junior/src/chat/logging.ts b/packages/junior/src/chat/logging.ts index 6ea321a2..c896e1c2 100644 --- a/packages/junior/src/chat/logging.ts +++ b/packages/junior/src/chat/logging.ts @@ -1780,131 +1780,47 @@ function toFiniteTokenCount(value: unknown): number | undefined { return rounded >= 0 ? rounded : undefined; } -function readTokenCount( - root: Record, - keys: string[], -): number | undefined { - for (const key of keys) { - const value = toFiniteTokenCount(root[key]); - if (value !== undefined) { - return value; - } - } - return undefined; -} - -function collectUsageRoots(source: unknown): Record[] { - const sourceRecord = asRecord(source); - if (!sourceRecord) { - return []; - } - - const roots: Record[] = [sourceRecord]; - const usage = asRecord(sourceRecord.usage); - if (usage) { - roots.push(usage); - } - - const tokenUsage = asRecord(sourceRecord.tokenUsage); - if (tokenUsage) { - roots.push(tokenUsage); - } - - const providerMetadata = asRecord(sourceRecord.providerMetadata); - if (providerMetadata) { - roots.push(providerMetadata); - const providerUsage = asRecord(providerMetadata.usage); - if (providerUsage) { - roots.push(providerUsage); - } - } - - const response = asRecord(sourceRecord.response); - if (response) { - roots.push(response); - const responseUsage = asRecord(response.usage); - if (responseUsage) { - roots.push(responseUsage); - } - } - - return roots; -} - -const USAGE_FIELD_ALIASES: Record = { - inputTokens: [ - // pi-ai subtracts cached tokens from `input`; we count them separately below. - "input", - "input_tokens", - "inputTokens", - "prompt_tokens", - "promptTokens", - "inputTokenCount", - "promptTokenCount", - ], - outputTokens: [ - "output", - "output_tokens", - "outputTokens", - "completion_tokens", - "completionTokens", - "outputTokenCount", - "completionTokenCount", - ], - cachedInputTokens: [ - "cacheRead", - "cached_tokens", - "cachedTokens", - "cached_input_tokens", - "cachedInputTokens", - "cache_read_input_tokens", - "cacheReadInputTokens", - ], - cacheCreationTokens: [ - "cacheWrite", - "cache_creation_input_tokens", - "cacheCreationInputTokens", - "cache_write_tokens", - "cacheWriteTokens", - ], - reasoningTokens: ["reasoning_tokens", "reasoningTokens"], - totalTokens: ["total_tokens", "totalTokens", "totalTokenCount"], -}; +// pi-ai `Usage` field name -> our camelCase equivalent. This is the only shape +// that reaches the extractor today; pi-ai normalizes every provider response +// into this canonical set before we ever see it. +const PI_USAGE_FIELDS: ReadonlyArray<[string, keyof AgentTurnUsage]> = [ + ["input", "inputTokens"], + ["output", "outputTokens"], + ["cacheRead", "cachedInputTokens"], + ["cacheWrite", "cacheCreationTokens"], + ["totalTokens", "totalTokens"], +]; -function extractUsageFromSource(source: unknown): AgentTurnUsage { - const roots = collectUsageRoots(source); - if (roots.length === 0) { +function readPiUsage(source: unknown): AgentTurnUsage { + const record = asRecord(source); + if (!record) { return {}; } - + // Accept either a pi-ai AssistantMessage (has `.usage`) or a bare Usage record. + const usage = asRecord(record.usage) ?? record; const summary: AgentTurnUsage = {}; - for (const [field, aliases] of Object.entries(USAGE_FIELD_ALIASES) as [ - keyof AgentTurnUsage, - string[], - ][]) { - const value = - roots - .map((root) => readTokenCount(root, aliases)) - .find((candidate) => candidate !== undefined) ?? undefined; + for (const [piKey, ourKey] of PI_USAGE_FIELDS) { + const value = toFiniteTokenCount(usage[piKey]); if (value !== undefined) { - summary[field] = value; + summary[ourKey] = value; } } return summary; } /** - * Extract a structured token-usage summary from provider metadata roots. + * Sum pi-ai `Usage` counters across every source into an `AgentTurnUsage`. * - * Values are summed across sources so callers can pass every assistant message - * produced during a turn and get the aggregate usage for that turn. + * Callers pass every assistant message produced during a turn so the result + * reflects the aggregate usage for the entire turn rather than a single model + * call. Sources without a recognized usage record contribute nothing. */ export function extractGenAiUsageSummary( ...sources: unknown[] ): AgentTurnUsage { const summary: AgentTurnUsage = {}; for (const source of sources) { - const single = extractUsageFromSource(source); + const single = readPiUsage(source); for (const field of Object.keys(single) as (keyof AgentTurnUsage)[]) { const value = single[field]; if (value === undefined) continue; diff --git a/packages/junior/src/chat/slack/footer.ts b/packages/junior/src/chat/slack/footer.ts index 148ad675..54242ead 100644 --- a/packages/junior/src/chat/slack/footer.ts +++ b/packages/junior/src/chat/slack/footer.ts @@ -58,15 +58,14 @@ function resolveTotalTokens( } // Sum every individual counter the provider reported so cached + cache - // creation + reasoning tokens are included in the displayed total. Provider - // `totalTokens` fields are inconsistent across vendors (some exclude cached - // tokens, some include them), so prefer the sum when component counts exist. + // creation tokens are included in the displayed total. Provider `totalTokens` + // fields are inconsistent across vendors (some exclude cached tokens, some + // include them), so prefer the sum when component counts exist. const components = [ usage.inputTokens, usage.outputTokens, usage.cachedInputTokens, usage.cacheCreationTokens, - usage.reasoningTokens, ].filter((value): value is number => value !== undefined); if (components.length > 0) { diff --git a/packages/junior/src/chat/usage.ts b/packages/junior/src/chat/usage.ts index b4a998eb..a364bbd6 100644 --- a/packages/junior/src/chat/usage.ts +++ b/packages/junior/src/chat/usage.ts @@ -1,24 +1,20 @@ /** * Structured token usage captured for a single agent turn. * - * Fields are stored individually so renderers can decide whether to display a - * breakdown or a single aggregate. Providers only populate the counters they - * report; missing fields mean "not reported" rather than zero. + * Mirrors the fields pi-ai emits on `AssistantMessage.usage` (see + * `@mariozechner/pi-ai` `Usage`) so diagnostics carry every counter the + * provider normalizes into the pi-ai shape as its own item. Renderers decide + * whether to display a breakdown or a single aggregate. */ export interface AgentTurnUsage { + /** Non-cached input tokens (pi-ai subtracts cached tokens from this). */ inputTokens?: number; + /** Output tokens; pi-ai folds reasoning tokens into this for providers that report them. */ outputTokens?: number; + /** Cached input tokens read from the provider's prompt cache. */ cachedInputTokens?: number; + /** Input tokens written into the provider's prompt cache. */ cacheCreationTokens?: number; - reasoningTokens?: number; + /** Provider-reported total. May not equal the sum of individual counters across providers. */ totalTokens?: number; } - -export const AGENT_TURN_USAGE_KEYS = [ - "inputTokens", - "outputTokens", - "cachedInputTokens", - "cacheCreationTokens", - "reasoningTokens", - "totalTokens", -] as const satisfies readonly (keyof AgentTurnUsage)[]; diff --git a/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts index 719ec866..a36a7f49 100644 --- a/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts +++ b/packages/junior/tests/unit/logging/extract-gen-ai-usage-summary.test.ts @@ -6,7 +6,7 @@ describe("extractGenAiUsageSummary", () => { expect(extractGenAiUsageSummary({}, undefined, null)).toEqual({}); }); - it("captures pi-ai AssistantMessage usage shape", () => { + it("captures the pi-ai AssistantMessage.usage shape", () => { const assistantMessage = { role: "assistant", usage: { @@ -27,27 +27,21 @@ describe("extractGenAiUsageSummary", () => { }); }); - it("captures OpenAI-style prompt_tokens_details.cached_tokens", () => { - const providerResponse = { - usage: { - prompt_tokens: 500, - completion_tokens: 200, - total_tokens: 700, - prompt_tokens_details: { - cached_tokens: 300, - }, - completion_tokens_details: { - reasoning_tokens: 50, - }, - }, - }; - - // The shared extractor only reads direct keys, not nested *_details - // records, but the top-level aliases still capture the primary counters. - expect(extractGenAiUsageSummary(providerResponse)).toEqual({ - inputTokens: 500, - outputTokens: 200, - totalTokens: 700, + it("accepts a bare pi-ai Usage record as a source", () => { + expect( + extractGenAiUsageSummary({ + input: 10, + output: 5, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 15, + }), + ).toEqual({ + inputTokens: 10, + outputTokens: 5, + cachedInputTokens: 0, + cacheCreationTokens: 0, + totalTokens: 15, }); }); diff --git a/packages/junior/tests/unit/slack/footer.test.ts b/packages/junior/tests/unit/slack/footer.test.ts index 782f0ab0..dc494178 100644 --- a/packages/junior/tests/unit/slack/footer.test.ts +++ b/packages/junior/tests/unit/slack/footer.test.ts @@ -49,7 +49,6 @@ describe("buildSlackReplyFooter", () => { outputTokens: 50, cachedInputTokens: 200, cacheCreationTokens: 10, - reasoningTokens: 5, totalTokens: 9999, }, }), @@ -57,7 +56,7 @@ describe("buildSlackReplyFooter", () => { items: [ { label: "Tokens", - value: "365", + value: "360", }, ], });