Skip to content

Commit 23570df

Browse files
committed
🤖 test: add regression tests for step providerMetadata and cache creation
Cover the fix to prevent regression: - StreamingMessageAggregator: test step providerMetadata storage/retrieval/clear - displayUsage: test cacheCreationInputTokens extraction from providerMetadata _Generated with mux_
1 parent fd8a296 commit 23570df

File tree

6 files changed

+140
-49
lines changed

6 files changed

+140
-49
lines changed

‎scripts/bump_tag.sh‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ if [[ -z "$CURRENT_VERSION" || "$CURRENT_VERSION" == "null" ]]; then
1818
fi
1919

2020
# Parse semver components
21-
IFS='.' read -r MAJOR MINOR_V PATCH <<< "$CURRENT_VERSION"
21+
IFS='.' read -r MAJOR MINOR_V PATCH <<<"$CURRENT_VERSION"
2222

2323
# Calculate new version
2424
if [[ "$MINOR" == "true" ]]; then
@@ -30,7 +30,7 @@ fi
3030
echo "Bumping version: $CURRENT_VERSION -> $NEW_VERSION"
3131

3232
# Update package.json
33-
jq --arg v "$NEW_VERSION" '.version = $v' package.json > package.json.tmp
33+
jq --arg v "$NEW_VERSION" '.version = $v' package.json >package.json.tmp
3434
mv package.json.tmp package.json
3535

3636
# Commit and tag

‎src/browser/utils/messages/StreamingMessageAggregator.test.ts‎

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,40 @@ describe("StreamingMessageAggregator", () => {
517517
expect(aggregator.getActiveStreamCumulativeProviderMetadata("msg-1")).toBeUndefined();
518518
});
519519

520+
test("stores and retrieves step providerMetadata for cache creation display", () => {
521+
const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
522+
523+
aggregator.handleUsageDelta({
524+
type: "usage-delta",
525+
workspaceId: "ws-1",
526+
messageId: "msg-1",
527+
usage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
528+
cumulativeUsage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
529+
providerMetadata: {
530+
anthropic: { cacheCreationInputTokens: 800 },
531+
},
532+
});
533+
534+
expect(aggregator.getActiveStreamStepProviderMetadata("msg-1")).toEqual({
535+
anthropic: { cacheCreationInputTokens: 800 },
536+
});
537+
});
538+
539+
test("step providerMetadata is undefined when not provided", () => {
540+
const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
541+
542+
aggregator.handleUsageDelta({
543+
type: "usage-delta",
544+
workspaceId: "ws-1",
545+
messageId: "msg-1",
546+
usage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
547+
cumulativeUsage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
548+
// No providerMetadata
549+
});
550+
551+
expect(aggregator.getActiveStreamStepProviderMetadata("msg-1")).toBeUndefined();
552+
});
553+
520554
test("clearTokenState clears all usage tracking (step, cumulative, metadata)", () => {
521555
const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT);
522556

@@ -526,18 +560,21 @@ describe("StreamingMessageAggregator", () => {
526560
messageId: "msg-1",
527561
usage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
528562
cumulativeUsage: { inputTokens: 1000, outputTokens: 50, totalTokens: 1050 },
563+
providerMetadata: { anthropic: { cacheCreationInputTokens: 300 } },
529564
cumulativeProviderMetadata: { anthropic: { cacheCreationInputTokens: 500 } },
530565
});
531566

532567
// All should be defined
533568
expect(aggregator.getActiveStreamUsage("msg-1")).toBeDefined();
569+
expect(aggregator.getActiveStreamStepProviderMetadata("msg-1")).toBeDefined();
534570
expect(aggregator.getActiveStreamCumulativeUsage("msg-1")).toBeDefined();
535571
expect(aggregator.getActiveStreamCumulativeProviderMetadata("msg-1")).toBeDefined();
536572

537573
aggregator.clearTokenState("msg-1");
538574

539575
// All should be cleared
540576
expect(aggregator.getActiveStreamUsage("msg-1")).toBeUndefined();
577+
expect(aggregator.getActiveStreamStepProviderMetadata("msg-1")).toBeUndefined();
541578
expect(aggregator.getActiveStreamCumulativeUsage("msg-1")).toBeUndefined();
542579
expect(aggregator.getActiveStreamCumulativeProviderMetadata("msg-1")).toBeUndefined();
543580
});

‎src/browser/utils/messages/StreamingMessageAggregator.ts‎

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,17 @@ export class StreamingMessageAggregator {
7575
// Delta history for token counting and TPS calculation
7676
private deltaHistory = new Map<string, DeltaRecordStorage>();
7777

78-
// Active stream step usage (updated on each stream-step event)
79-
// Tracks last step's usage for context window display
80-
private activeStreamStepUsage = new Map<string, LanguageModelV2Usage>();
81-
// Tracks step provider metadata for context window cache display
82-
private activeStreamStepProviderMetadata = new Map<string, Record<string, unknown>>();
83-
// Tracks cumulative usage across all steps for live cost display
84-
private activeStreamCumulativeUsage = new Map<string, LanguageModelV2Usage>();
85-
// Tracks cumulative provider metadata for live cost display (with cache creation tokens)
86-
private activeStreamCumulativeProviderMetadata = new Map<string, Record<string, unknown>>();
78+
// Active stream usage tracking (updated on each usage-delta event)
79+
// Consolidates step-level (context window) and cumulative (cost) usage by messageId
80+
private activeStreamUsage = new Map<
81+
string,
82+
{
83+
// Step-level: this step only (for context window display)
84+
step: { usage: LanguageModelV2Usage; providerMetadata?: Record<string, unknown> };
85+
// Cumulative: sum across all steps (for live cost display)
86+
cumulative: { usage: LanguageModelV2Usage; providerMetadata?: Record<string, unknown> };
87+
}
88+
>();
8789

8890
// Current TODO list (updated when todo_write succeeds, cleared on stream end)
8991
// Stream-scoped: automatically reset when stream completes
@@ -1062,45 +1064,38 @@ export class StreamingMessageAggregator {
10621064
*/
10631065
clearTokenState(messageId: string): void {
10641066
this.deltaHistory.delete(messageId);
1065-
this.activeStreamStepUsage.delete(messageId);
1066-
this.activeStreamStepProviderMetadata.delete(messageId);
1067-
this.activeStreamCumulativeUsage.delete(messageId);
1068-
this.activeStreamCumulativeProviderMetadata.delete(messageId);
1067+
this.activeStreamUsage.delete(messageId);
10691068
}
10701069

10711070
/**
10721071
* Handle usage-delta event: update usage tracking for active stream
10731072
*/
10741073
handleUsageDelta(data: UsageDeltaEvent): void {
1075-
// Store last step's usage for context window display
1076-
this.activeStreamStepUsage.set(data.messageId, data.usage);
1077-
// Store step provider metadata for context window cache display
1078-
if (data.providerMetadata) {
1079-
this.activeStreamStepProviderMetadata.set(data.messageId, data.providerMetadata);
1080-
}
1081-
// Store cumulative usage for cost display
1082-
this.activeStreamCumulativeUsage.set(data.messageId, data.cumulativeUsage);
1083-
// Store cumulative provider metadata for live cost display (with cache creation tokens)
1084-
if (data.cumulativeProviderMetadata) {
1085-
this.activeStreamCumulativeProviderMetadata.set(
1086-
data.messageId,
1087-
data.cumulativeProviderMetadata
1088-
);
1089-
}
1074+
this.activeStreamUsage.set(data.messageId, {
1075+
step: { usage: data.usage, providerMetadata: data.providerMetadata },
1076+
cumulative: { usage: data.cumulativeUsage, providerMetadata: data.cumulativeProviderMetadata },
1077+
});
10901078
}
10911079

10921080
/**
10931081
* Get active stream usage for context window display (last step's inputTokens = context size)
10941082
*/
10951083
getActiveStreamUsage(messageId: string): LanguageModelV2Usage | undefined {
1096-
return this.activeStreamStepUsage.get(messageId);
1084+
return this.activeStreamUsage.get(messageId)?.step.usage;
1085+
}
1086+
1087+
/**
1088+
* Get step provider metadata for context window cache display
1089+
*/
1090+
getActiveStreamStepProviderMetadata(messageId: string): Record<string, unknown> | undefined {
1091+
return this.activeStreamUsage.get(messageId)?.step.providerMetadata;
10971092
}
10981093

10991094
/**
11001095
* Get active stream cumulative usage for cost display (sum of all steps)
11011096
*/
11021097
getActiveStreamCumulativeUsage(messageId: string): LanguageModelV2Usage | undefined {
1103-
return this.activeStreamCumulativeUsage.get(messageId);
1098+
return this.activeStreamUsage.get(messageId)?.cumulative.usage;
11041099
}
11051100

11061101
/**
@@ -1109,13 +1104,6 @@ export class StreamingMessageAggregator {
11091104
getActiveStreamCumulativeProviderMetadata(
11101105
messageId: string
11111106
): Record<string, unknown> | undefined {
1112-
return this.activeStreamCumulativeProviderMetadata.get(messageId);
1113-
}
1114-
1115-
/**
1116-
* Get step provider metadata for context window cache display
1117-
*/
1118-
getActiveStreamStepProviderMetadata(messageId: string): Record<string, unknown> | undefined {
1119-
return this.activeStreamStepProviderMetadata.get(messageId);
1107+
return this.activeStreamUsage.get(messageId)?.cumulative.providerMetadata;
11201108
}
11211109
}

‎src/common/types/stream.ts‎

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,14 @@ export interface UsageDeltaEvent {
142142
type: "usage-delta";
143143
workspaceId: string;
144144
messageId: string;
145-
// This step's usage (inputTokens = current context size, for context window display)
145+
146+
// Step-level: this step only (for context window display)
146147
usage: LanguageModelV2Usage;
147-
// Cumulative usage across all steps so far (for live cost display)
148+
providerMetadata?: Record<string, unknown>;
149+
150+
// Cumulative: sum across all steps (for live cost display)
148151
cumulativeUsage: LanguageModelV2Usage;
149-
// Cumulative provider metadata across all steps (for live cost display with cache tokens)
150152
cumulativeProviderMetadata?: Record<string, unknown>;
151-
// This step's provider metadata (for context window cache display)
152-
providerMetadata?: Record<string, unknown>;
153153
}
154154

155155
export type AIServiceEvent =

‎src/common/utils/tokens/displayUsage.test.ts‎

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,4 +276,68 @@ describe("createDisplayUsage", () => {
276276
expect(result!.input.tokens).toBe(1000);
277277
expect(result!.cached.tokens).toBe(0);
278278
});
279+
280+
describe("Anthropic cache creation tokens from providerMetadata", () => {
281+
// Cache creation tokens are Anthropic-specific and only available in
282+
// providerMetadata.anthropic.cacheCreationInputTokens, not in LanguageModelV2Usage.
283+
// This is critical for liveUsage display during streaming.
284+
285+
test("extracts cacheCreationInputTokens from providerMetadata", () => {
286+
const usage: LanguageModelV2Usage = {
287+
inputTokens: 1000,
288+
outputTokens: 50,
289+
totalTokens: 1050,
290+
};
291+
292+
const result = createDisplayUsage(usage, "anthropic:claude-sonnet-4-20250514", {
293+
anthropic: { cacheCreationInputTokens: 800 },
294+
});
295+
296+
expect(result).toBeDefined();
297+
expect(result!.cacheCreate.tokens).toBe(800);
298+
});
299+
300+
test("cacheCreate is 0 when providerMetadata is undefined", () => {
301+
const usage: LanguageModelV2Usage = {
302+
inputTokens: 1000,
303+
outputTokens: 50,
304+
totalTokens: 1050,
305+
};
306+
307+
const result = createDisplayUsage(usage, "anthropic:claude-sonnet-4-20250514");
308+
309+
expect(result).toBeDefined();
310+
expect(result!.cacheCreate.tokens).toBe(0);
311+
});
312+
313+
test("cacheCreate is 0 when anthropic metadata lacks cacheCreationInputTokens", () => {
314+
const usage: LanguageModelV2Usage = {
315+
inputTokens: 1000,
316+
outputTokens: 50,
317+
totalTokens: 1050,
318+
};
319+
320+
const result = createDisplayUsage(usage, "anthropic:claude-sonnet-4-20250514", {
321+
anthropic: { someOtherField: 123 },
322+
});
323+
324+
expect(result).toBeDefined();
325+
expect(result!.cacheCreate.tokens).toBe(0);
326+
});
327+
328+
test("handles gateway Anthropic model with cache creation", () => {
329+
const usage: LanguageModelV2Usage = {
330+
inputTokens: 2000,
331+
outputTokens: 100,
332+
totalTokens: 2100,
333+
};
334+
335+
const result = createDisplayUsage(usage, "mux-gateway:anthropic/claude-sonnet-4-5", {
336+
anthropic: { cacheCreationInputTokens: 1500 },
337+
});
338+
339+
expect(result).toBeDefined();
340+
expect(result!.cacheCreate.tokens).toBe(1500);
341+
});
342+
});
279343
});

‎src/node/services/streamManager.ts‎

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -969,10 +969,12 @@ export class StreamManager extends EventEmitter {
969969
type: "usage-delta",
970970
workspaceId: workspaceId as string,
971971
messageId: streamInfo.messageId,
972-
usage: finishStepPart.usage, // For context window display
973-
cumulativeUsage: streamInfo.cumulativeUsage, // For live cost display
974-
cumulativeProviderMetadata: streamInfo.cumulativeProviderMetadata, // For live cache costs
975-
providerMetadata: finishStepPart.providerMetadata, // For context window cache display
972+
// Step-level (for context window display)
973+
usage: finishStepPart.usage,
974+
providerMetadata: finishStepPart.providerMetadata,
975+
// Cumulative (for live cost display)
976+
cumulativeUsage: streamInfo.cumulativeUsage,
977+
cumulativeProviderMetadata: streamInfo.cumulativeProviderMetadata,
976978
};
977979
this.emit("usage-delta", usageEvent);
978980
break;

0 commit comments

Comments
 (0)