🤖 Remove models.json from worker bundle (533KB → 3KB)

ammar-agent · ammar-agent · commit ef0c0062f34e · 2025-10-14T11:55:54.000-05:00
The approximate calculator was importing getModelStats() which pulled in
the 701KB models.json file, making the worker bundle 533KB. This caused
the renderer to wait for the large download before showing the window.

Cost calculation in the worker is unnecessary - costs are already calculated
accurately in the main process and stored in message metadata. The worker
only needs to aggregate token counts for the UI display.

Changes:
- Remove getModelStats import
- Set cost fields to undefined in createDisplayUsage()
- Fix approximateCountTokensForData() calls (removed invalid 2nd parameter)

Impact:
- Worker bundle: 533KB → 3KB (99.4% reduction)
- Removes 701KB models.json from renderer bundle
- Window shows immediately without waiting for worker load
diff --git a/src/utils/tokens/tokenStatsCalculatorApproximate.ts b/src/utils/tokens/tokenStatsCalculatorApproximate.ts
@@ -9,7 +9,6 @@
 import type { CmuxMessage } from "@/types/message";
 import type { ChatStats, TokenConsumer } from "@/types/chatStats";
 import type { LanguageModelV2Usage } from "@ai-sdk/provider";
-import { getModelStats } from "./modelStats";
 import type { ChatUsageDisplay } from "./usageAggregator";
 
 // Simple approximation tokenizer to avoid loading 8MB ai-tokenizer package
@@ -61,23 +60,13 @@ export function createDisplayUsage(
     (usage.outputTokens ?? 0) - (usage.reasoningTokens ?? 0)
   );
 
-  // Get model stats for cost calculation
-  const modelStats = getModelStats(model);
-
-  // Calculate costs based on model stats (undefined if model unknown)
-  let inputCost: number | undefined;
-  let cachedCost: number | undefined;
-  let cacheCreateCost: number | undefined;
-  let outputCost: number | undefined;
-  let reasoningCost: number | undefined;
-
-  if (modelStats) {
-    inputCost = inputTokens * modelStats.input_cost_per_token;
-    cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
-    cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
-    outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
-    reasoningCost = (usage.reasoningTokens ?? 0) * modelStats.output_cost_per_token;
-  }
+  // In approximation mode, we don't calculate costs to avoid loading 701KB models.json
+  // Costs are calculated accurately in the main process and stored in message metadata
+  const inputCost: number | undefined = undefined;
+  const cachedCost: number | undefined = undefined;
+  const cacheCreateCost: number | undefined = undefined;
+  const outputCost: number | undefined = undefined;
+  const reasoningCost: number | undefined = undefined;
 
   return {
     input: {
@@ -190,7 +179,7 @@ export function calculateTokenStatsApproximate(messages: CmuxMessage[], model: s
       for (const part of message.parts) {
         if (part.type === "dynamic-tool") {
           // Count tool arguments
-          const argsTokens = approximateCountTokensForData(part.input, tokenizer);
+          const argsTokens = approximateCountTokensForData(part.input);
 
           // Count tool results if available
           // Tool results have nested structure: { type: "json", value: {...} }
@@ -235,11 +224,11 @@ export function calculateTokenStatsApproximate(messages: CmuxMessage[], model: s
                 resultTokens = Math.ceil(encryptedChars * 0.75);
               } else {
                 // Normal web search results without encryption
-                resultTokens = approximateCountTokensForData(outputData, tokenizer);
+                resultTokens = approximateCountTokensForData(outputData);
               }
             } else {
               // Normal tool results
-              resultTokens = approximateCountTokensForData(outputData, tokenizer);
+              resultTokens = approximateCountTokensForData(outputData);
             }
           }