Skip to content

Commit ef0c006

Browse files
committed
🤖 Remove models.json from worker bundle (533KB → 3KB)
The approximate calculator was importing getModelStats() which pulled in the 701KB models.json file, making the worker bundle 533KB. This caused the renderer to wait for the large download before showing the window. Cost calculation in the worker is unnecessary - costs are already calculated accurately in the main process and stored in message metadata. The worker only needs to aggregate token counts for the UI display. Changes: - Remove getModelStats import - Set cost fields to undefined in createDisplayUsage() - Fix approximateCountTokensForData() calls (removed invalid 2nd parameter) Impact: - Worker bundle: 533KB → 3KB (99.4% reduction) - Removes 701KB models.json from renderer bundle - Window shows immediately without waiting for worker load
1 parent 4476934 commit ef0c006

File tree

1 file changed

+10
-21
lines changed

1 file changed

+10
-21
lines changed

‎src/utils/tokens/tokenStatsCalculatorApproximate.ts‎

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import type { CmuxMessage } from "@/types/message";
1010
import type { ChatStats, TokenConsumer } from "@/types/chatStats";
1111
import type { LanguageModelV2Usage } from "@ai-sdk/provider";
12-
import { getModelStats } from "./modelStats";
1312
import type { ChatUsageDisplay } from "./usageAggregator";
1413

1514
// Simple approximation tokenizer to avoid loading 8MB ai-tokenizer package
@@ -61,23 +60,13 @@ export function createDisplayUsage(
6160
(usage.outputTokens ?? 0) - (usage.reasoningTokens ?? 0)
6261
);
6362

64-
// Get model stats for cost calculation
65-
const modelStats = getModelStats(model);
66-
67-
// Calculate costs based on model stats (undefined if model unknown)
68-
let inputCost: number | undefined;
69-
let cachedCost: number | undefined;
70-
let cacheCreateCost: number | undefined;
71-
let outputCost: number | undefined;
72-
let reasoningCost: number | undefined;
73-
74-
if (modelStats) {
75-
inputCost = inputTokens * modelStats.input_cost_per_token;
76-
cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
77-
cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
78-
outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
79-
reasoningCost = (usage.reasoningTokens ?? 0) * modelStats.output_cost_per_token;
80-
}
63+
// In approximation mode, we don't calculate costs to avoid loading 701KB models.json
64+
// Costs are calculated accurately in the main process and stored in message metadata
65+
const inputCost: number | undefined = undefined;
66+
const cachedCost: number | undefined = undefined;
67+
const cacheCreateCost: number | undefined = undefined;
68+
const outputCost: number | undefined = undefined;
69+
const reasoningCost: number | undefined = undefined;
8170

8271
return {
8372
input: {
@@ -190,7 +179,7 @@ export function calculateTokenStatsApproximate(messages: CmuxMessage[], model: s
190179
for (const part of message.parts) {
191180
if (part.type === "dynamic-tool") {
192181
// Count tool arguments
193-
const argsTokens = approximateCountTokensForData(part.input, tokenizer);
182+
const argsTokens = approximateCountTokensForData(part.input);
194183

195184
// Count tool results if available
196185
// Tool results have nested structure: { type: "json", value: {...} }
@@ -235,11 +224,11 @@ export function calculateTokenStatsApproximate(messages: CmuxMessage[], model: s
235224
resultTokens = Math.ceil(encryptedChars * 0.75);
236225
} else {
237226
// Normal web search results without encryption
238-
resultTokens = approximateCountTokensForData(outputData, tokenizer);
227+
resultTokens = approximateCountTokensForData(outputData);
239228
}
240229
} else {
241230
// Normal tool results
242-
resultTokens = approximateCountTokensForData(outputData, tokenizer);
231+
resultTokens = approximateCountTokensForData(outputData);
243232
}
244233
}
245234

0 commit comments

Comments
 (0)