Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 38 additions & 94 deletions packages/junior/src/chat/logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1780,110 +1780,54 @@ function toFiniteTokenCount(value: unknown): number | undefined {
return rounded >= 0 ? rounded : undefined;
}

function readTokenCount(
root: Record<string, unknown>,
keys: string[],
): number | undefined {
for (const key of keys) {
const value = toFiniteTokenCount(root[key]);
if (value !== undefined) {
return value;
}
}
return undefined;
}

function collectUsageRoots(source: unknown): Record<string, unknown>[] {
const sourceRecord = asRecord(source);
if (!sourceRecord) {
return [];
}

const roots: Record<string, unknown>[] = [sourceRecord];
const usage = asRecord(sourceRecord.usage);
if (usage) {
roots.push(usage);
}

const tokenUsage = asRecord(sourceRecord.tokenUsage);
if (tokenUsage) {
roots.push(tokenUsage);
}
// pi-ai `Usage` field name -> our camelCase equivalent. This is the only shape
// that reaches the extractor today; pi-ai normalizes every provider response
// into this canonical set before we ever see it.
const PI_USAGE_FIELDS: ReadonlyArray<[string, keyof AgentTurnUsage]> = [
["input", "inputTokens"],
["output", "outputTokens"],
["cacheRead", "cachedInputTokens"],
["cacheWrite", "cacheCreationTokens"],
["totalTokens", "totalTokens"],
];

const providerMetadata = asRecord(sourceRecord.providerMetadata);
if (providerMetadata) {
roots.push(providerMetadata);
const providerUsage = asRecord(providerMetadata.usage);
if (providerUsage) {
roots.push(providerUsage);
}
function readPiUsage(source: unknown): AgentTurnUsage {
const record = asRecord(source);
if (!record) {
return {};
}

const response = asRecord(sourceRecord.response);
if (response) {
roots.push(response);
const responseUsage = asRecord(response.usage);
if (responseUsage) {
roots.push(responseUsage);
// Accept either a pi-ai AssistantMessage (has `.usage`) or a bare Usage record.
const usage = asRecord(record.usage) ?? record;
const summary: AgentTurnUsage = {};
for (const [piKey, ourKey] of PI_USAGE_FIELDS) {
const value = toFiniteTokenCount(usage[piKey]);
if (value !== undefined) {
summary[ourKey] = value;
}
}

return roots;
return summary;
}

/** Extract a structured token-usage summary from provider metadata roots. */
/**
* Sum pi-ai `Usage` counters across every source into an `AgentTurnUsage`.
*
* Callers pass every assistant message produced during a turn so the result
* reflects the aggregate usage for the entire turn rather than a single model
* call. Sources without a recognized usage record contribute nothing.
*/
export function extractGenAiUsageSummary(
...sources: unknown[]
): AgentTurnUsage {
const roots = sources.flatMap((source) => collectUsageRoots(source));
if (roots.length === 0) {
return {};
const summary: AgentTurnUsage = {};
for (const source of sources) {
const single = readPiUsage(source);
for (const field of Object.keys(single) as (keyof AgentTurnUsage)[]) {
const value = single[field];
if (value === undefined) continue;
summary[field] = (summary[field] ?? 0) + value;
}
}

const inputTokens =
roots
.map((root) =>
readTokenCount(root, [
"input_tokens",
"inputTokens",
"prompt_tokens",
"promptTokens",
"inputTokenCount",
"promptTokenCount",
]),
)
.find((value) => value !== undefined) ?? undefined;

const outputTokens =
roots
.map((root) =>
readTokenCount(root, [
"output_tokens",
"outputTokens",
"completion_tokens",
"completionTokens",
"outputTokenCount",
"completionTokenCount",
]),
)
.find((value) => value !== undefined) ?? undefined;

const totalTokens =
roots
.map((root) =>
readTokenCount(root, [
"total_tokens",
"totalTokens",
"totalTokenCount",
]),
)
.find((value) => value !== undefined) ?? undefined;

return {
...(inputTokens !== undefined ? { inputTokens } : {}),
...(outputTokens !== undefined ? { outputTokens } : {}),
...(totalTokens !== undefined ? { totalTokens } : {}),
};
return summary;
}

/** Extract input/output token counts from AI provider usage metadata for tracing. */
Expand Down
11 changes: 5 additions & 6 deletions packages/junior/src/chat/respond.ts
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,11 @@ export async function generateAssistantReply(
agent.state,
...outputMessages,
);
turnUsage =
usageSummary.inputTokens !== undefined ||
usageSummary.outputTokens !== undefined ||
usageSummary.totalTokens !== undefined
? usageSummary
: undefined;
turnUsage = Object.values(usageSummary).some(
(value) => value !== undefined,
)
? usageSummary
: undefined;
setSpanAttributes({
...(outputMessagesAttribute
? { "gen_ai.output.messages": outputMessagesAttribute }
Expand Down
21 changes: 16 additions & 5 deletions packages/junior/src/chat/slack/footer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,26 @@ function formatSlackDuration(durationMs: number): string {
function resolveTotalTokens(
usage: AgentTurnUsage | undefined,
): number | undefined {
if (usage?.totalTokens !== undefined) {
return usage.totalTokens;
if (!usage) {
return undefined;
}

if (usage?.inputTokens !== undefined && usage.outputTokens !== undefined) {
return usage.inputTokens + usage.outputTokens;
// Sum every individual counter the provider reported so cached + cache
// creation tokens are included in the displayed total. Provider `totalTokens`
// fields are inconsistent across vendors (some exclude cached tokens, some
// include them), so prefer the sum when component counts exist.
const components = [
usage.inputTokens,
usage.outputTokens,
usage.cachedInputTokens,
usage.cacheCreationTokens,
].filter((value): value is number => value !== undefined);

if (components.length > 0) {
return components.reduce((sum, value) => sum + value, 0);
}

return undefined;
return usage.totalTokens;
}

/** Build a compact Slack reply footer so operators can correlate visible replies with backend state. */
Expand Down
15 changes: 15 additions & 0 deletions packages/junior/src/chat/usage.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
/**
* Structured token usage captured for a single agent turn.
*
* Mirrors the fields pi-ai emits on `AssistantMessage.usage` (see
* `@mariozechner/pi-ai` `Usage`) so diagnostics carry every counter the
* provider normalizes into the pi-ai shape as its own item. Renderers decide
* whether to display a breakdown or a single aggregate.
*/
export interface AgentTurnUsage {
/** Non-cached input tokens (pi-ai subtracts cached tokens from this). */
inputTokens?: number;
/** Output tokens; pi-ai folds reasoning tokens into this for providers that report them. */
outputTokens?: number;
/** Cached input tokens read from the provider's prompt cache. */
cachedInputTokens?: number;
/** Input tokens written into the provider's prompt cache. */
cacheCreationTokens?: number;
/** Provider-reported total. May not equal the sum of individual counters across providers. */
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing reasoningTokens field promised by PR

Medium Severity

The PR title says "count all token types (input, output, cached, reasoning)" and the summary explicitly states AgentTurnUsage is extended with reasoningTokens. The footer description says the total is input + output + cachedInput + cacheCreation + reasoning. However, reasoningTokens is entirely missing — it's not in the AgentTurnUsage interface, not in PI_USAGE_FIELDS, and not in resolveTotalTokens's components array. Grep confirms zero matches across the package. When a provider surfaces reasoning tokens as a distinct field (not folded into output), they'll be silently dropped and the Slack footer total will under-count.

Additional Locations (2)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 512c423. Configure here.

totalTokens?: number;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { describe, expect, it } from "vitest";
import { extractGenAiUsageSummary } from "@/chat/logging";

describe("extractGenAiUsageSummary", () => {
it("returns empty object for sources with no usage metadata", () => {
expect(extractGenAiUsageSummary({}, undefined, null)).toEqual({});
});

it("captures the pi-ai AssistantMessage.usage shape", () => {
const assistantMessage = {
role: "assistant",
usage: {
input: 120,
output: 45,
cacheRead: 900,
cacheWrite: 60,
totalTokens: 1125,
},
};

expect(extractGenAiUsageSummary(assistantMessage)).toEqual({
inputTokens: 120,
outputTokens: 45,
cachedInputTokens: 900,
cacheCreationTokens: 60,
totalTokens: 1125,
});
});

it("accepts a bare pi-ai Usage record as a source", () => {
expect(
extractGenAiUsageSummary({
input: 10,
output: 5,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 15,
}),
).toEqual({
inputTokens: 10,
outputTokens: 5,
cachedInputTokens: 0,
cacheCreationTokens: 0,
totalTokens: 15,
});
});

it("sums usage across multiple sources (multi-message turn)", () => {
const firstCall = {
usage: {
input: 100,
output: 50,
cacheRead: 10,
cacheWrite: 0,
totalTokens: 160,
},
};
const secondCall = {
usage: {
input: 200,
output: 30,
cacheRead: 5,
cacheWrite: 0,
totalTokens: 235,
},
};

expect(extractGenAiUsageSummary(firstCall, secondCall)).toEqual({
inputTokens: 300,
outputTokens: 80,
cachedInputTokens: 15,
cacheCreationTokens: 0,
totalTokens: 395,
});
});

it("ignores sources without a usage record while summing the rest", () => {
const emptyAgentState = { messages: [] };
const assistantMessage = {
usage: {
input: 10,
output: 2,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 12,
},
};

expect(
extractGenAiUsageSummary(undefined, emptyAgentState, assistantMessage),
).toEqual({
inputTokens: 10,
outputTokens: 2,
cachedInputTokens: 0,
cacheCreationTokens: 0,
totalTokens: 12,
});
});
});
36 changes: 36 additions & 0 deletions packages/junior/tests/unit/slack/footer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,42 @@ describe("buildSlackReplyFooter", () => {
it("omits the footer when no items are available", () => {
expect(buildSlackReplyFooter({})).toBeUndefined();
});

it("sums individual token counters when rendering the Tokens item", () => {
expect(
buildSlackReplyFooter({
usage: {
inputTokens: 100,
outputTokens: 50,
cachedInputTokens: 200,
cacheCreationTokens: 10,
totalTokens: 9999,
},
}),
).toEqual({
items: [
{
label: "Tokens",
value: "360",
},
],
});
});

it("falls back to totalTokens when no component counters are reported", () => {
expect(
buildSlackReplyFooter({
usage: { totalTokens: 1234 },
}),
).toEqual({
items: [
{
label: "Tokens",
value: "1,234",
},
],
});
});
});

describe("buildSlackReplyBlocks", () => {
Expand Down
Loading