diff --git a/eslint.config.mjs b/eslint.config.mjs index 65045f056..15ab04f76 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -317,6 +317,7 @@ export default defineConfig([ "src/services/aiService.ts", "src/utils/tools/tools.ts", "src/utils/ai/providerFactory.ts", + "src/utils/main/tokenizer.ts", ], rules: { "no-restricted-syntax": "off", diff --git a/src/components/ChatMetaSidebar.tsx b/src/components/ChatMetaSidebar.tsx new file mode 100644 index 000000000..f6e1ec648 --- /dev/null +++ b/src/components/ChatMetaSidebar.tsx @@ -0,0 +1,121 @@ +import React from "react"; +import styled from "@emotion/styled"; +import { usePersistedState } from "@/hooks/usePersistedState"; +import { useWorkspaceUsage } from "@/stores/WorkspaceStore"; +import { use1MContext } from "@/hooks/use1MContext"; +import { useResizeObserver } from "@/hooks/useResizeObserver"; +import { CostsTab } from "./RightSidebar/CostsTab"; +import { VerticalTokenMeter } from "./RightSidebar/VerticalTokenMeter"; +import { calculateTokenMeterData } from "@/utils/tokens/tokenMeterUtils"; + +interface SidebarContainerProps { + collapsed: boolean; +} + +const SidebarContainer = styled.div` + width: ${(props) => (props.collapsed ? "20px" : "300px")}; + background: #252526; + border-left: 1px solid #3e3e42; + display: flex; + flex-direction: column; + overflow: hidden; + transition: width 0.2s ease; + flex-shrink: 0; + + /* Keep vertical bar always visible when collapsed */ + ${(props) => + props.collapsed && + ` + position: sticky; + right: 0; + z-index: 10; + box-shadow: -2px 0 4px rgba(0, 0, 0, 0.2); + `} +`; + +const FullView = styled.div<{ visible: boolean }>` + display: ${(props) => (props.visible ? "flex" : "none")}; + flex-direction: column; + height: 100%; +`; + +const CollapsedView = styled.div<{ visible: boolean }>` + display: ${(props) => (props.visible ? "flex" : "none")}; + height: 100%; +`; + +const ContentScroll = styled.div` + flex: 1; + overflow-y: auto; + padding: 15px; +`; + +interface ChatMetaSidebarProps { + workspaceId: string; + chatAreaRef: React.RefObject; +} + +const ChatMetaSidebarComponent: React.FC = ({ workspaceId, chatAreaRef }) => { + const usage = useWorkspaceUsage(workspaceId); + const [use1M] = use1MContext(); + const chatAreaSize = useResizeObserver(chatAreaRef); + + const lastUsage = usage?.usageHistory[usage.usageHistory.length - 1]; + + // Memoize vertical meter data calculation to prevent unnecessary re-renders + const verticalMeterData = React.useMemo(() => { + // Get model from last usage + const model = lastUsage?.model ?? "unknown"; + return lastUsage + ? calculateTokenMeterData(lastUsage, model, use1M, true) + : { segments: [], totalTokens: 0, totalPercentage: 0 }; + }, [lastUsage, use1M]); + + // Calculate if we should show collapsed view with hysteresis + // Strategy: Observe ChatArea width directly (independent of sidebar width) + // - ChatArea has min-width: 750px and flex: 1 + // - Use hysteresis to prevent oscillation: + // * Collapse when chatAreaWidth <= 800px (tight space) + // * Expand when chatAreaWidth >= 1100px (lots of space) + // * Between 800-1100: maintain current state (dead zone) + const COLLAPSE_THRESHOLD = 800; // Collapse below this + const EXPAND_THRESHOLD = 1100; // Expand above this + const chatAreaWidth = chatAreaSize?.width ?? 1000; // Default to large to avoid flash + + // Persist collapsed state globally (not per-workspace) since chat area width is shared + // This prevents animation flash when switching workspaces - sidebar maintains its state + const [showCollapsed, setShowCollapsed] = usePersistedState( + "chat-meta-sidebar:collapsed", + false + ); + + React.useEffect(() => { + if (chatAreaWidth <= COLLAPSE_THRESHOLD) { + setShowCollapsed(true); + } else if (chatAreaWidth >= EXPAND_THRESHOLD) { + setShowCollapsed(false); + } + // Between thresholds: maintain current state (no change) + }, [chatAreaWidth, setShowCollapsed]); + + return ( + + + + + + + + + + + ); +}; + +// Memoize to prevent re-renders when parent (AIView) re-renders during streaming +// Only re-renders when workspaceId or chatAreaRef changes, or internal state updates +export const ChatMetaSidebar = React.memo(ChatMetaSidebarComponent); diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts index e4cc27ce8..ab2ef5f1f 100644 --- a/src/debug/agentSessionCli.ts +++ b/src/debug/agentSessionCli.ts @@ -1,6 +1,6 @@ #!/usr/bin/env bun -import assert from "node:assert/strict"; +import assert from "@/utils/assert"; import * as fs from "fs/promises"; import * as path from "path"; import { parseArgs } from "util"; diff --git a/src/debug/chatExtractors.ts b/src/debug/chatExtractors.ts index 1650cf31d..413daf867 100644 --- a/src/debug/chatExtractors.ts +++ b/src/debug/chatExtractors.ts @@ -1,4 +1,4 @@ -import assert from "node:assert/strict"; +import assert from "@/utils/assert"; import type { CmuxReasoningPart, CmuxTextPart, CmuxToolPart } from "@/types/message"; export function extractAssistantText(parts: unknown): string { diff --git a/src/main-desktop.ts b/src/main-desktop.ts index 58ed794dc..7209ddebb 100644 --- a/src/main-desktop.ts +++ b/src/main-desktop.ts @@ -353,6 +353,9 @@ function createWindow() { const windowWidth = Math.max(1200, Math.floor(screenWidth * 0.8)); const windowHeight = Math.max(800, Math.floor(screenHeight * 0.8)); + console.log(`[${timestamp()}] [window] Creating BrowserWindow...`); + console.time("[window] BrowserWindow creation"); + mainWindow = new BrowserWindow({ width: windowWidth, height: windowHeight, @@ -368,8 +371,13 @@ function createWindow() { show: false, // Don't show until ready-to-show event }); + console.timeEnd("[window] BrowserWindow creation"); + // Register IPC handlers with the main window + console.log(`[${timestamp()}] [window] Registering IPC handlers...`); + console.time("[window] IPC registration"); ipcMain.register(electronIpcMain, mainWindow); + console.timeEnd("[window] IPC registration"); // Register updater IPC handlers (available in both dev and prod) electronIpcMain.handle(IPC_CHANNELS.UPDATE_CHECK, () => { @@ -415,10 +423,12 @@ function createWindow() { } // Show window once it's ready and close splash + console.time("main window startup"); mainWindow.once("ready-to-show", () => { console.log(`[${timestamp()}] Main window ready to show`); mainWindow?.show(); closeSplashScreen(); + console.timeEnd("main window startup"); }); // Open all external links in default browser @@ -439,10 +449,14 @@ function createWindow() { // Load from dev server in development, built files in production // app.isPackaged is true when running from a built .app/.exe, false in development + console.log(`[${timestamp()}] [window] Loading content...`); + console.time("[window] Content load"); if ((isE2ETest && !forceDistLoad) || (!app.isPackaged && !forceDistLoad)) { // Development mode: load from vite dev server const devHost = process.env.CMUX_DEVSERVER_HOST ?? "127.0.0.1"; - void mainWindow.loadURL(`http://${devHost}:${devServerPort}`); + const url = `http://${devHost}:${devServerPort}`; + console.log(`[${timestamp()}] [window] Loading from dev server: ${url}`); + void mainWindow.loadURL(url); if (!isE2ETest) { mainWindow.webContents.once("did-finish-load", () => { mainWindow?.webContents.openDevTools(); @@ -450,9 +464,22 @@ function createWindow() { } } else { // Production mode: load built files - void mainWindow.loadFile(path.join(__dirname, "index.html")); + const htmlPath = path.join(__dirname, "index.html"); + console.log(`[${timestamp()}] [window] Loading from file: ${htmlPath}`); + void mainWindow.loadFile(htmlPath); } + // Track when content finishes loading + mainWindow.webContents.once("did-finish-load", () => { + console.timeEnd("[window] Content load"); + console.log(`[${timestamp()}] [window] Content finished loading`); + + // NOTE: Tokenizer modules are NOT loaded at startup anymore! + // The Proxy in tokenizer.ts loads them on-demand when first accessed. + // This reduces startup time from ~8s to <1s. + // First token count will use approximation, accurate count caches in background. + }); + mainWindow.on("closed", () => { mainWindow = null; }); @@ -492,15 +519,7 @@ if (gotTheLock) { createWindow(); // Note: splash closes in ready-to-show event handler - // Start loading tokenizer modules in background after window is created - // This ensures accurate token counts for first API calls (especially in e2e tests) - // Loading happens asynchronously and won't block the UI - if (loadTokenizerModulesFn) { - void loadTokenizerModulesFn().then(() => { - console.log(`[${timestamp()}] Tokenizer modules loaded`); - }); - } - // No need to auto-start workspaces anymore - they start on demand + // Tokenizer modules load in background after did-finish-load event (see createWindow()) } catch (error) { console.error(`[${timestamp()}] Startup failed:`, error); diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts index 23b2c653a..394d631cd 100644 --- a/src/services/agentSession.ts +++ b/src/services/agentSession.ts @@ -1,4 +1,4 @@ -import assert from "node:assert/strict"; +import assert from "@/utils/assert"; import { EventEmitter } from "events"; import * as path from "path"; import { createCmuxMessage } from "@/types/message"; @@ -13,6 +13,7 @@ import { createUnknownSendMessageError } from "@/services/utils/sendMessageError import type { Result } from "@/types/result"; import { Ok, Err } from "@/types/result"; import { enforceThinkingPolicy } from "@/utils/thinking/policy"; +import { loadTokenizerForModel } from "@/utils/main/tokenizer"; interface ImagePart { url: string; @@ -302,6 +303,19 @@ export class AgentSession { modelString: string, options?: SendMessageOptions ): Promise> { + try { + assert( + typeof modelString === "string" && modelString.trim().length > 0, + "modelString must be a non-empty string" + ); + await loadTokenizerForModel(modelString); + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + return Err( + createUnknownSendMessageError(`Failed to preload tokenizer for ${modelString}: ${reason}`) + ); + } + const commitResult = await this.partialService.commitToHistory(this.workspaceId); if (!commitResult.success) { return Err(createUnknownSendMessageError(commitResult.error)); diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts index b261e8014..465d9ad21 100644 --- a/src/services/ipcMain.ts +++ b/src/services/ipcMain.ts @@ -1,4 +1,4 @@ -import assert from "node:assert/strict"; +import assert from "@/utils/assert"; import type { BrowserWindow, IpcMain as ElectronIpcMain } from "electron"; import { spawn, spawnSync } from "child_process"; import * as fsPromises from "fs/promises"; diff --git a/src/services/utils/sendMessageError.ts b/src/services/utils/sendMessageError.ts index 6b3449779..a14d7bdce 100644 --- a/src/services/utils/sendMessageError.ts +++ b/src/services/utils/sendMessageError.ts @@ -1,4 +1,4 @@ -import assert from "node:assert/strict"; +import assert from "@/utils/assert"; import type { SendMessageError } from "@/types/errors"; /** diff --git a/src/stores/WorkspaceConsumerManager.ts b/src/stores/WorkspaceConsumerManager.ts index b0cee2f20..dcbb48063 100644 --- a/src/stores/WorkspaceConsumerManager.ts +++ b/src/stores/WorkspaceConsumerManager.ts @@ -1,3 +1,4 @@ +import assert from "@/utils/assert"; import type { WorkspaceConsumersState } from "./WorkspaceStore"; import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker"; import type { StreamingMessageAggregator } from "@/utils/messages/StreamingMessageAggregator"; @@ -48,11 +49,24 @@ export class WorkspaceConsumerManager { // Callback to bump the store when calculation completes private readonly onCalculationComplete: (workspaceId: string) => void; + // Track pending store notifications to avoid duplicate bumps within the same tick + private pendingNotifications = new Set(); + constructor(onCalculationComplete: (workspaceId: string) => void) { this.tokenWorker = new TokenStatsWorker(); this.onCalculationComplete = onCalculationComplete; } + onTokenizerReady(listener: () => void): () => void { + assert(typeof listener === "function", "Tokenizer ready listener must be a function"); + return this.tokenWorker.onTokenizerReady(listener); + } + + onTokenizerEncodingLoaded(listener: (encodingName: string) => void): () => void { + assert(typeof listener === "function", "Tokenizer encoding listener must be a function"); + return this.tokenWorker.onEncodingLoaded(listener); + } + /** * Get cached state without side effects. * Returns null if no cache exists. @@ -117,7 +131,7 @@ export class WorkspaceConsumerManager { // Notify store if newly scheduled (triggers UI update) if (isNewSchedule) { - this.onCalculationComplete(workspaceId); + this.notifyStoreAsync(workspaceId); } // Set new timer (150ms - imperceptible to humans, batches rapid events) @@ -143,7 +157,7 @@ export class WorkspaceConsumerManager { this.pendingCalcs.add(workspaceId); // Mark as calculating and notify store - this.onCalculationComplete(workspaceId); + this.notifyStoreAsync(workspaceId); // Run in next tick to avoid blocking caller void (async () => { @@ -170,7 +184,7 @@ export class WorkspaceConsumerManager { }); // Notify store to trigger re-render - this.onCalculationComplete(workspaceId); + this.notifyStoreAsync(workspaceId); } catch (error) { // Cancellations are expected during rapid events - don't cache, don't log // This allows lazy trigger to retry on next access @@ -186,7 +200,7 @@ export class WorkspaceConsumerManager { totalTokens: 0, isCalculating: false, }); - this.onCalculationComplete(workspaceId); + this.notifyStoreAsync(workspaceId); } finally { this.pendingCalcs.delete(workspaceId); @@ -200,6 +214,26 @@ export class WorkspaceConsumerManager { })(); } + private notifyStoreAsync(workspaceId: string): void { + if (this.pendingNotifications.has(workspaceId)) { + return; + } + + this.pendingNotifications.add(workspaceId); + + const schedule = + typeof queueMicrotask === "function" + ? queueMicrotask + : (callback: () => void) => { + void Promise.resolve().then(callback); + }; + + schedule(() => { + this.pendingNotifications.delete(workspaceId); + this.onCalculationComplete(workspaceId); + }); + } + /** * Remove workspace state and cleanup timers. */ @@ -216,6 +250,7 @@ export class WorkspaceConsumerManager { this.scheduledCalcs.delete(workspaceId); this.pendingCalcs.delete(workspaceId); this.needsRecalc.delete(workspaceId); + this.pendingNotifications.delete(workspaceId); } /** @@ -235,5 +270,7 @@ export class WorkspaceConsumerManager { this.cache.clear(); this.scheduledCalcs.clear(); this.pendingCalcs.clear(); + this.needsRecalc.clear(); + this.pendingNotifications.clear(); } } diff --git a/src/stores/WorkspaceStore.ts b/src/stores/WorkspaceStore.ts index afc265210..b91782b32 100644 --- a/src/stores/WorkspaceStore.ts +++ b/src/stores/WorkspaceStore.ts @@ -1,3 +1,4 @@ +import assert from "@/utils/assert"; import type { CmuxMessage, DisplayedMessage } from "@/types/message"; import { createCmuxMessage } from "@/types/message"; import type { FrontendWorkspaceMetadata } from "@/types/workspace"; @@ -23,7 +24,7 @@ import { isReasoningEnd, } from "@/types/ipc"; import { MapStore } from "./MapStore"; -import { createDisplayUsage } from "@/utils/tokens/tokenStatsCalculator"; +import { createDisplayUsage } from "@/utils/tokens/displayUsage"; import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager"; import type { ChatUsageDisplay } from "@/utils/tokens/usageAggregator"; import type { TokenConsumer } from "@/types/chatStats"; @@ -112,6 +113,7 @@ export class WorkspaceStore { // Architecture: WorkspaceStore orchestrates (decides when), manager executes (performs calculations) // Dual-cache: consumersStore (MapStore) handles subscriptions, manager owns data cache private readonly consumerManager: WorkspaceConsumerManager; + private readonly cleanupTokenizerReady: () => void; // Supporting data structures private aggregators = new Map(); @@ -143,6 +145,31 @@ export class WorkspaceStore { this.consumersStore.bump(workspaceId); }); + const rescheduleConsumers = () => { + for (const [workspaceId, aggregator] of this.aggregators.entries()) { + assert( + workspaceId.length > 0, + "Workspace ID must be non-empty when rescheduling consumers" + ); + if (!this.caughtUp.get(workspaceId)) { + continue; + } + if (aggregator.getAllMessages().length === 0) { + continue; + } + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + } + }; + + const cleanupReady = this.consumerManager.onTokenizerReady(rescheduleConsumers); + const cleanupEncoding = this.consumerManager.onTokenizerEncodingLoaded(() => { + rescheduleConsumers(); + }); + this.cleanupTokenizerReady = () => { + cleanupReady(); + cleanupEncoding(); + }; + // Note: We DON'T auto-check recency on every state bump. // Instead, checkAndBumpRecencyIfChanged() is called explicitly after // message completion events (not on deltas) to prevent App.tsx re-renders. @@ -714,6 +741,7 @@ export class WorkspaceStore { dispose(): void { // Clean up consumer manager this.consumerManager.dispose(); + this.cleanupTokenizerReady(); for (const unsubscribe of this.ipcUnsubscribers.values()) { unsubscribe(); diff --git a/src/utils/assert.ts b/src/utils/assert.ts new file mode 100644 index 000000000..0e061f6cb --- /dev/null +++ b/src/utils/assert.ts @@ -0,0 +1,16 @@ +// Browser-safe assertion helper for renderer and worker bundles. +// Throws immediately when invariants are violated so bugs surface early. +export class AssertionError extends Error { + constructor(message?: string) { + super(message ?? "Assertion failed"); + this.name = "AssertionError"; + } +} + +export function assert(condition: unknown, message?: string): asserts condition { + if (!condition) { + throw new AssertionError(message); + } +} + +export default assert; diff --git a/src/utils/main/tokenizer.test.ts b/src/utils/main/tokenizer.test.ts new file mode 100644 index 000000000..c93605f1e --- /dev/null +++ b/src/utils/main/tokenizer.test.ts @@ -0,0 +1,78 @@ +import { beforeEach, describe, expect, test } from "bun:test"; + +import { + __resetTokenizerForTests, + getTokenizerForModel, + loadTokenizerForModel, + loadTokenizerModules, + onTokenizerEncodingLoaded, +} from "./tokenizer"; + +beforeEach(() => { + __resetTokenizerForTests(); +}); + +describe("tokenizer caching", () => { + test("does not cache fallback approximations", async () => { + await loadTokenizerModules(); + + const model = "openai:gpt-4-turbo"; + const tokenizer = getTokenizerForModel(model); + const text = "cmux-fallback-check-" + "a".repeat(40); + + const fallbackCount = tokenizer.countTokens(text); + const approximation = Math.ceil(text.length / 4); + expect(fallbackCount).toBe(approximation); + + await loadTokenizerForModel(model); + + const accurateCount = tokenizer.countTokens(text); + + expect(accurateCount).not.toBe(fallbackCount); + expect(accurateCount).toBeGreaterThan(0); + }); + + test("replays loaded encodings for late listeners", async () => { + const model = "openai:gpt-4o"; + await loadTokenizerForModel(model); + + const received: string[] = []; + const unsubscribe = onTokenizerEncodingLoaded((encodingName) => { + received.push(encodingName); + }); + unsubscribe(); + + expect(received.length).toBeGreaterThan(0); + expect(received).toContain("o200k_base"); + }); + + test("accurate counts replace fallback approximations", async () => { + const model = "openai:gpt-4-turbo"; + const tokenizer = getTokenizerForModel(model); + const text = "cmux-accuracy-check-" + "b".repeat(80); + + let unsubscribe: () => void = () => undefined; + const encodingReady = new Promise((resolve) => { + unsubscribe = onTokenizerEncodingLoaded((encodingName) => { + if (encodingName === "cl100k_base") { + unsubscribe(); + resolve(); + } + }); + }); + + const fallbackCount = tokenizer.countTokens(text); + const approximation = Math.ceil(text.length / 4); + expect(fallbackCount).toBe(approximation); + + await encodingReady; + await Promise.resolve(); + + const accurateCount = tokenizer.countTokens(text); + expect(accurateCount).not.toBe(fallbackCount); + expect(accurateCount).toBeGreaterThan(0); + + const cachedCount = tokenizer.countTokens(text); + expect(cachedCount).toBe(accurateCount); + }); +}); diff --git a/src/utils/main/tokenizer.ts b/src/utils/main/tokenizer.ts index 4c8bce7c0..862e5d162 100644 --- a/src/utils/main/tokenizer.ts +++ b/src/utils/main/tokenizer.ts @@ -1,7 +1,7 @@ /** * Token calculation utilities for chat statistics */ - +import assert from "@/utils/assert"; import { LRUCache } from "lru-cache"; import CRC32 from "crc-32"; import { getToolSchemas, getAvailableTools } from "@/utils/tools/toolDefinitions"; @@ -11,58 +11,424 @@ export interface Tokenizer { countTokens: (text: string) => number; } -/** - * Lazy-loaded tokenizer modules to reduce startup time - * These are loaded on first use with /4 approximation fallback - * - * eslint-disable-next-line @typescript-eslint/consistent-type-imports -- Dynamic imports are intentional for lazy loading - */ -interface TokenizerModuleImports { +interface TokenizerBaseModules { + // Base module properties (always required) // eslint-disable-next-line @typescript-eslint/consistent-type-imports AITokenizer: typeof import("ai-tokenizer").default; // eslint-disable-next-line @typescript-eslint/consistent-type-imports models: typeof import("ai-tokenizer").models; - // eslint-disable-next-line @typescript-eslint/consistent-type-imports - o200k_base: typeof import("ai-tokenizer/encoding/o200k_base"); - // eslint-disable-next-line @typescript-eslint/consistent-type-imports - claude: typeof import("ai-tokenizer/encoding/claude"); } -let tokenizerModules: TokenizerModuleImports | null = null; +// eslint-disable-next-line @typescript-eslint/consistent-type-imports +type EncodingModule = import("ai-tokenizer").Encoding; + +const BASE_MODULE_PROPS = ["AITokenizer", "models"] as const satisfies ReadonlyArray< + keyof TokenizerBaseModules +>; + +const KNOWN_ENCODINGS = ["o200k_base", "claude"] as const; + +/** + * Dynamic imports below are deliberate to keep ~2MB encoding bundles out of the initial + * startup path. See eslint.config.mjs for the scoped override that documents this policy. + */ + +/** + * Module cache - stores loaded modules + */ +const moduleCache: { + base: TokenizerBaseModules | null; + encodings: Map; +} = { + base: null, + encodings: new Map(), +}; + +let baseLoadPromise: Promise | null = null; +const encodingLoadPromises = new Map>(); + +type TokenizerReadyListener = () => void; +const readyListeners = new Set(); +let tokenizerModulesReady = false; + +type TokenizerEncodingListener = (encodingName: string) => void; +const encodingListeners = new Set(); + +function isTokenizerReady(): boolean { + return moduleCache.base !== null && moduleCache.encodings.size > 0; +} + +function now(): number { + const perf = globalThis.performance; + if (perf && typeof perf.now === "function") { + return perf.now.call(perf); + } + return Date.now(); +} + +interface Logger { + info: (...args: unknown[]) => void; + error: (...args: unknown[]) => void; + debug: (...args: unknown[]) => void; +} + +const consoleLogger: Logger = { + info: (...args) => console.log(...args), + error: (...args) => console.error(...args), + debug: (...args) => { + if (typeof process !== "undefined" && process.env?.CMUX_DEBUG) { + console.debug(...args); + } + }, +}; + +let activeLogger: Logger = consoleLogger; + +// Lazy-import log.ts in the Electron main process only to keep renderer bundles small. +if (typeof process !== "undefined" && process.type === "browser") { + void import("@/services/log") + .then((module) => { + activeLogger = module.log; + }) + .catch(() => { + // Fallback to console logging when log.ts is unavailable (tests, worker builds). + }); +} + +const logger: Logger = { + info: (...args) => activeLogger.info(...args), + error: (...args) => activeLogger.error(...args), + debug: (...args) => activeLogger.debug(...args), +}; + +function notifyIfTokenizerReady(): void { + if (tokenizerModulesReady || !isTokenizerReady()) { + return; + } + + tokenizerModulesReady = true; + for (const listener of readyListeners) { + try { + listener(); + } catch (error) { + logger.error("[tokenizer] Ready listener threw:", error); + } + } + readyListeners.clear(); +} -let tokenizerLoadPromise: Promise | null = null; +function notifyEncodingLoaded(encodingName: string): void { + assert( + encodingName.length > 0, + "Tokenizer encoding notification requires non-empty encoding name" + ); + if (encodingListeners.size === 0) { + return; + } + for (const listener of encodingListeners) { + try { + listener(encodingName); + } catch (error) { + logger.error(`[tokenizer] Encoding listener threw for '${encodingName}':`, error); + } + } +} /** - * Load tokenizer modules asynchronously + * Registers a listener fired once the tokenizer base and at least one encoding finish loading. + * Prefer `onTokenizerEncodingLoaded` for UI updates that need per-encoding fidelity. + */ +export function onTokenizerModulesLoaded(listener: () => void): () => void { + if (tokenizerModulesReady || isTokenizerReady()) { + tokenizerModulesReady = true; + listener(); + return () => undefined; + } + + readyListeners.add(listener); + return () => { + readyListeners.delete(listener); + }; +} + +export function onTokenizerEncodingLoaded(listener: TokenizerEncodingListener): () => void { + assert(typeof listener === "function", "Tokenizer encoding listener must be a function"); + encodingListeners.add(listener); + + // Immediately notify about already-loaded encodings so listeners can catch up. + for (const encodingName of moduleCache.encodings.keys()) { + try { + listener(encodingName); + } catch (error) { + logger.error( + `[tokenizer] Encoding listener threw for '${encodingName}' during initial replay:`, + error + ); + } + } + + return () => { + encodingListeners.delete(listener); + }; +} + +function getCachedBaseModules(): TokenizerBaseModules | null { + return moduleCache.base; +} + +async function loadBaseModules(): Promise { + if (moduleCache.base) { + return moduleCache.base; + } + + if (!baseLoadPromise) { + const timerLabel = "[tokenizer] load base module"; + logger.info(`${timerLabel} started`); + baseLoadPromise = (async () => { + const startMs = now(); + try { + const module = await import("ai-tokenizer"); + + assert( + typeof module.default === "function", + "Tokenizer base module default export must be a constructor" + ); + assert( + typeof module.models === "object" && module.models !== null, + "Tokenizer base module must export models metadata" + ); + const baseModules: TokenizerBaseModules = { + AITokenizer: module.default, + models: module.models, + }; + for (const prop of BASE_MODULE_PROPS) { + assert(prop in baseModules, `Tokenizer base modules missing '${String(prop)}' property`); + } + moduleCache.base = baseModules; + notifyIfTokenizerReady(); + return baseModules; + } catch (error) { + logger.error( + "[tokenizer] Failed to load base tokenizer modules; token counts will rely on approximations until retry succeeds", + error + ); + throw error; + } finally { + const durationMs = now() - startMs; + logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`); + } + })(); + } + + try { + const baseModules = await baseLoadPromise; + assert( + moduleCache.base === baseModules, + "Tokenizer base modules cache must contain the loaded modules" + ); + return baseModules; + } catch (error) { + moduleCache.base = null; + baseLoadPromise = null; + throw error; + } finally { + if (moduleCache.base) { + baseLoadPromise = null; + } + } +} + +function beginLoadBase(): void { + void loadBaseModules().catch(() => { + logger.error( + "[tokenizer] Base tokenizer modules failed to preload; token counts will stay approximate until retry succeeds" + ); + // Error already logged in loadBaseModules(); leave cache unset so callers retry. + }); +} + +function getCachedEncoding(encodingName: string): EncodingModule | undefined { + assert( + typeof encodingName === "string" && encodingName.length > 0, + "Tokenizer encoding name must be a non-empty string" + ); + return moduleCache.encodings.get(encodingName); +} + +async function loadEncodingModule(encodingName: string): Promise { + const cached = getCachedEncoding(encodingName); + if (cached) { + return cached; + } + + let promise = encodingLoadPromises.get(encodingName); + if (!promise) { + const loader = ENCODING_LOADERS[encodingName]; + assert(loader, `Tokenizer encoding loader missing for '${encodingName}'`); + + const timerLabel = `[tokenizer] load encoding: ${encodingName}`; + logger.info(`${timerLabel} started`); + + promise = (async () => { + const startMs = now(); + try { + const module = await loader(); + moduleCache.encodings.set(encodingName, module); + notifyIfTokenizerReady(); + notifyEncodingLoaded(encodingName); + return module; + } catch (error) { + logger.error( + `[tokenizer] Failed to load tokenizer encoding '${encodingName}'; token counts will fall back to approximations`, + error + ); + throw error; + } finally { + const durationMs = now() - startMs; + logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`); + } + })(); + + encodingLoadPromises.set(encodingName, promise); + } + + try { + const encoding = await promise; + assert( + moduleCache.encodings.get(encodingName) === encoding, + "Tokenizer encoding cache must match the loaded encoding" + ); + return encoding; + } catch (error) { + encodingLoadPromises.delete(encodingName); + throw error; + } finally { + if (moduleCache.encodings.has(encodingName)) { + encodingLoadPromises.delete(encodingName); + } + } +} + +function normalizeEncodingModule( + encodingName: string, + module: Record +): EncodingModule { + const candidate = module as Partial; + + if (typeof candidate.name !== "string" || candidate.name.length === 0) { + throw new Error(`Tokenizer encoding '${encodingName}' module missing name field`); + } + + if (candidate.name !== encodingName) { + throw new Error( + `Tokenizer encoding loader mismatch: expected '${encodingName}' but received '${String(candidate.name)}'` + ); + } + + if ( + typeof candidate.pat_str !== "string" || + typeof candidate.special_tokens !== "object" || + candidate.special_tokens === null || + typeof candidate.stringEncoder !== "object" || + candidate.stringEncoder === null || + !Array.isArray(candidate.binaryEncoder) || + typeof candidate.decoder !== "object" || + candidate.decoder === null + ) { + throw new Error(`Tokenizer encoding '${encodingName}' module missing required fields`); + } + + return { + name: candidate.name, + pat_str: candidate.pat_str, + special_tokens: candidate.special_tokens, + stringEncoder: candidate.stringEncoder, + binaryEncoder: candidate.binaryEncoder, + decoder: candidate.decoder, + }; +} + +const ENCODING_LOADERS: Record Promise> = { + o200k_base: async () => + normalizeEncodingModule("o200k_base", await import("ai-tokenizer/encoding/o200k_base")), + claude: async () => + normalizeEncodingModule("claude", await import("ai-tokenizer/encoding/claude")), + cl100k_base: async () => + normalizeEncodingModule("cl100k_base", await import("ai-tokenizer/encoding/cl100k_base")), + p50k_base: async () => + normalizeEncodingModule("p50k_base", await import("ai-tokenizer/encoding/p50k_base")), +}; + +// Track if loadTokenizerModules() is already in progress +let eagerLoadPromise: Promise | null = null; + +/** + * Load tokenizer modules asynchronously (eager mode - loads all known encodings) * Dynamic imports are intentional here to defer loading heavy tokenizer modules * until first use, reducing app startup time from ~8.8s to <1s * + * Idempotent - safe to call multiple times + * * @returns Promise that resolves when tokenizer modules are loaded */ export async function loadTokenizerModules(): Promise { - if (tokenizerModules) return; - if (tokenizerLoadPromise) return tokenizerLoadPromise; - - tokenizerLoadPromise = (async () => { - // Performance: lazy load tokenizer modules to reduce startup time from ~8.8s to <1s - /* eslint-disable no-restricted-syntax */ - const [AITokenizerModule, modelsModule, o200k_base, claude] = await Promise.all([ - import("ai-tokenizer"), - import("ai-tokenizer"), - import("ai-tokenizer/encoding/o200k_base"), - import("ai-tokenizer/encoding/claude"), - ]); - /* eslint-enable no-restricted-syntax */ - - tokenizerModules = { - AITokenizer: AITokenizerModule.default, - models: modelsModule.models, - o200k_base, - claude, - }; + const allLoaded = + moduleCache.base && KNOWN_ENCODINGS.every((enc) => moduleCache.encodings.has(enc)); + + if (allLoaded) { + return; + } + + if (eagerLoadPromise) { + return eagerLoadPromise; + } + + logger.info("[tokenizer] loadTokenizerModules() called"); + + const timerLabel = "[tokenizer] loadTokenizerModules() total"; + const work = (async () => { + logger.info("[tokenizer] Starting loads for base + encodings:", KNOWN_ENCODINGS); + const startMs = now(); + try { + const basePromise = loadBaseModules(); + const encodingPromises = KNOWN_ENCODINGS.map((enc) => loadEncodingModule(enc)); + await Promise.all([basePromise, ...encodingPromises]); + logger.info("[tokenizer] All modules loaded successfully"); + notifyIfTokenizerReady(); + } finally { + const durationMs = now() - startMs; + logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`); + } })(); - return tokenizerLoadPromise; + eagerLoadPromise = work + .catch((error) => { + logger.error("[tokenizer] loadTokenizerModules() failed", error); + throw error; + }) + .finally(() => { + eagerLoadPromise = null; + }); + + return eagerLoadPromise; +} + +/** + * Load only the tokenizer modules needed for a specific model + * More efficient than loadTokenizerModules() if you know the model upfront + * + * This loads ~50% faster than loadTokenizerModules() since it only loads + * the base module + one encoding instead of all encodings. + * + * @param modelString - Model identifier (e.g., "anthropic:claude-opus-4-1", "openai:gpt-4") + */ +export async function loadTokenizerForModel(modelString: string): Promise { + const baseModules = await loadBaseModules(); + assert(baseModules, "Tokenizer base modules must be loaded before selecting encodings"); + + const encodingName = getTokenizerEncoding(modelString, baseModules); + await loadEncodingModule(encodingName); + notifyIfTokenizerReady(); } /** @@ -79,6 +445,41 @@ const tokenCountCache = new LRUCache({ }, }); +interface TokenCountCacheEntry { + value: number; + cache: boolean; +} + +type TokenCountResult = number | TokenCountCacheEntry; + +function normalizeTokenCountResult(result: TokenCountResult): TokenCountCacheEntry { + if (typeof result === "number") { + assert(Number.isFinite(result), "Token count must be a finite number"); + assert(result >= 0, "Token count cannot be negative"); + return { value: result, cache: true }; + } + + assert(Number.isFinite(result.value), "Token count must be a finite number"); + assert(result.value >= 0, "Token count cannot be negative"); + assert(typeof result.cache === "boolean", "Token count cache flag must be boolean"); + return result; +} + +function isPromiseLike(value: unknown): value is Promise { + return ( + typeof value === "object" && + value !== null && + "then" in (value as Record) && + typeof (value as PromiseLike).then === "function" + ); +} + +function fallbackTokenCount(text: string): TokenCountCacheEntry { + const approximation = Math.ceil(text.length / 4); + assert(Number.isFinite(approximation), "Token count approximation must be finite"); + return { value: approximation, cache: false }; +} + /** * Count tokens with caching via CRC32 checksum * Avoids re-tokenizing identical strings (system messages, tool definitions, etc.) @@ -86,7 +487,10 @@ const tokenCountCache = new LRUCache({ * NOTE: For async tokenization, this returns an approximation immediately and caches * the accurate count in the background. Subsequent calls will use the cached accurate count. */ -function countTokensCached(text: string, tokenizeFn: () => number | Promise): number { +function countTokensCached( + text: string, + tokenizeFn: () => TokenCountResult | Promise +): number { const checksum = CRC32.str(text); const cached = tokenCountCache.get(checksum); if (cached !== undefined) { @@ -95,23 +499,36 @@ function countTokensCached(text: string, tokenizeFn: () => number | Promise(result)) { + const normalized = normalizeTokenCountResult(result); + if (normalized.cache) { + tokenCountCache.set(checksum, normalized.value); + } + return normalized.value; } // Async case: return approximation now, cache accurate value when ready const approximation = Math.ceil(text.length / 4); - void result.then((count) => tokenCountCache.set(checksum, count)); + void result + .then((resolved) => { + const normalized = normalizeTokenCountResult(resolved); + if (normalized.cache) { + tokenCountCache.set(checksum, normalized.value); + } + }) + .catch((error) => { + logger.error("[tokenizer] Async tokenization failed", error); + }); return approximation; } -type TokenizerModules = TokenizerModuleImports; +type TokenizerModules = TokenizerBaseModules; type TokenizerModelRecord = Record; const FALLBACK_MODEL_KEY = "openai/gpt-4o"; const FALLBACK_ENCODING = "o200k_base"; +const TOKENIZATION_FALLBACK_MESSAGE = + "[tokenizer] Failed to tokenize with loaded modules; returning fallback approximation"; const MODEL_KEY_OVERRIDES: Record = { "anthropic:claude-sonnet-4-5": "anthropic/claude-sonnet-4.5", @@ -150,7 +567,8 @@ function resolveTokenizerEncoding(modelString: string, modules: TokenizerModules function getTokenizerEncoding(modelString: string, modules: TokenizerModules | null): string { if (!modules) { - return normalizeModelKey(modelString); + beginLoadBase(); + return FALLBACK_ENCODING; } return resolveTokenizerEncoding(modelString, modules); @@ -158,18 +576,43 @@ function getTokenizerEncoding(modelString: string, modules: TokenizerModules | n /** * Count tokens using loaded tokenizer modules - * Assumes tokenizerModules is not null + * Assumes base module is loaded; encoding will be loaded on-demand via Proxy if needed */ function countTokensWithLoadedModules( text: string, - modelString: string, - modules: NonNullable -): number { - const encodingName = getTokenizerEncoding(modelString, modules); + modelString: string +): TokenCountResult | Promise { + const cachedBase = getCachedBaseModules(); + if (!cachedBase) { + return (async () => { + const baseModules = await loadBaseModules(); + const encodingName = getTokenizerEncoding(modelString, baseModules); + const encoding = await loadEncodingModule(encodingName); + const tokenizer = new baseModules.AITokenizer(encoding); + const value = tokenizer.count(text); + assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number"); + return { value, cache: true } satisfies TokenCountCacheEntry; + })(); + } + + const encodingName = getTokenizerEncoding(modelString, cachedBase); + const cachedEncoding = getCachedEncoding(encodingName); + if (cachedEncoding) { + const tokenizer = new cachedBase.AITokenizer(cachedEncoding); + const value = tokenizer.count(text); + assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number"); + return { value, cache: true } satisfies TokenCountCacheEntry; + } - const encoding = encodingName === "claude" ? modules.claude : modules.o200k_base; - const tokenizer = new modules.AITokenizer(encoding); - return tokenizer.count(text); + return (async () => { + const encoding = await loadEncodingModule(encodingName); + const activeBase = getCachedBaseModules(); + assert(activeBase, "Tokenizer base modules must be available after loading encoding"); + const tokenizer = new activeBase.AITokenizer(encoding); + const value = tokenizer.count(text); + assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number"); + return { value, cache: true } satisfies TokenCountCacheEntry; + })(); } /** @@ -180,35 +623,30 @@ function countTokensWithLoadedModules( */ export function getTokenizerForModel(modelString: string): Tokenizer { // Start loading tokenizer modules in background (idempotent) - void loadTokenizerModules(); + void loadTokenizerModules().catch((error) => { + logger.error("[tokenizer] Failed to eagerly load tokenizer modules", error); + }); return { get encoding() { - return getTokenizerEncoding(modelString, tokenizerModules); + // NOTE: This Proxy-style getter runs before encodings finish loading; callers must tolerate + // fallback values (and potential transient undefined) until onTokenizerEncodingLoaded fires. + return getTokenizerEncoding(modelString, moduleCache.base); }, countTokens: (text: string) => { - // If tokenizer already loaded, use synchronous path for accurate counts - if (tokenizerModules) { - return countTokensCached(text, () => { - try { - return countTokensWithLoadedModules(text, modelString, tokenizerModules!); - } catch (error) { - // Unexpected error during tokenization, fallback to approximation - console.error("Failed to tokenize, falling back to approximation:", error); - return Math.ceil(text.length / 4); - } - }); - } - - // Tokenizer not yet loaded - use async path (returns approximation immediately) - return countTokensCached(text, async () => { - await loadTokenizerModules(); + return countTokensCached(text, () => { try { - return countTokensWithLoadedModules(text, modelString, tokenizerModules!); + const result = countTokensWithLoadedModules(text, modelString); + if (isPromiseLike(result)) { + return result.catch((error) => { + logger.error(TOKENIZATION_FALLBACK_MESSAGE, error); + return fallbackTokenCount(text); + }); + } + return result; } catch (error) { - // Unexpected error during tokenization, fallback to approximation - console.error("Failed to tokenize, falling back to approximation:", error); - return Math.ceil(text.length / 4); + logger.error(TOKENIZATION_FALLBACK_MESSAGE, error); + return fallbackTokenCount(text); } }); }, @@ -223,6 +661,22 @@ export function countTokensForData(data: unknown, tokenizer: Tokenizer): number return tokenizer.countTokens(serialized); } +/** + * Test helper to fully reset tokenizer state between test cases. + * Do NOT call from production code. + */ +export function __resetTokenizerForTests(): void { + moduleCache.base = null; + moduleCache.encodings.clear(); + baseLoadPromise = null; + encodingLoadPromises.clear(); + readyListeners.clear(); + tokenizerModulesReady = false; + encodingListeners.clear(); + eagerLoadPromise = null; + tokenCountCache.clear(); +} + /** * Get estimated token count for tool definitions * These are the schemas sent to the API for each tool diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts index 1e8b2efb5..b7ea83a1d 100644 --- a/src/utils/messages/StreamingMessageAggregator.ts +++ b/src/utils/messages/StreamingMessageAggregator.ts @@ -131,6 +131,17 @@ export class StreamingMessageAggregator { } addMessage(message: CmuxMessage): void { + const existing = this.messages.get(message.id); + if (existing) { + const existingParts = Array.isArray(existing.parts) ? existing.parts.length : 0; + const incomingParts = Array.isArray(message.parts) ? message.parts.length : 0; + + // Prefer richer content when duplicates arrive (e.g., placeholder vs completed message) + if (incomingParts < existingParts) { + return; + } + } + // Just store the message - backend assigns historySequence this.messages.set(message.id, message); this.invalidateCache(); diff --git a/src/utils/tokens/TokenStatsWorker.ts b/src/utils/tokens/TokenStatsWorker.ts index b35c11692..a399badfc 100644 --- a/src/utils/tokens/TokenStatsWorker.ts +++ b/src/utils/tokens/TokenStatsWorker.ts @@ -3,9 +3,17 @@ * Provides a clean async API for calculating stats off the main thread */ +import assert from "@/utils/assert"; import type { CmuxMessage } from "@/types/message"; import type { ChatStats } from "@/types/chatStats"; -import type { WorkerRequest, WorkerResponse, WorkerError } from "./tokenStats.worker"; +import type { + WorkerRequest, + WorkerResponse, + WorkerError, + WorkerNotification, +} from "./tokenStats.worker"; + +type WorkerMessage = WorkerResponse | WorkerError | WorkerNotification; /** * TokenStatsWorker manages a dedicated Web Worker for calculating token statistics @@ -19,6 +27,10 @@ export class TokenStatsWorker { resolve: (stats: ChatStats) => void; reject: (error: Error) => void; } | null = null; + private readonly tokenizerReadyListeners = new Set<() => void>(); + private readonly encodingListeners = new Set<(encodingName: string) => void>(); + private tokenizerReady = false; + private readonly loadedEncodings = new Set(); constructor() { // Create worker using Vite's Web Worker support @@ -31,6 +43,41 @@ export class TokenStatsWorker { this.worker.onerror = this.handleError.bind(this); } + onTokenizerReady(listener: () => void): () => void { + assert(typeof listener === "function", "Tokenizer ready listener must be a function"); + this.tokenizerReadyListeners.add(listener); + if (this.tokenizerReady) { + try { + listener(); + } catch (error) { + console.error("[TokenStatsWorker] Tokenizer ready listener threw", error); + } + } + return () => { + this.tokenizerReadyListeners.delete(listener); + }; + } + + onEncodingLoaded(listener: (encodingName: string) => void): () => void { + assert(typeof listener === "function", "Tokenizer encoding listener must be a function"); + this.encodingListeners.add(listener); + if (this.loadedEncodings.size > 0) { + for (const encodingName of this.loadedEncodings) { + try { + listener(encodingName); + } catch (error) { + console.error( + `[TokenStatsWorker] Tokenizer encoding listener threw for '${encodingName}' during replay`, + error + ); + } + } + } + return () => { + this.encodingListeners.delete(listener); + }; + } + /** * Calculate token statistics for the given messages * Cancels any pending calculation and starts a new one @@ -67,9 +114,22 @@ export class TokenStatsWorker { /** * Handle successful or error responses from worker */ - private handleMessage(e: MessageEvent) { + private handleMessage(e: MessageEvent) { const response = e.data; + if ("type" in response) { + if (response.type === "tokenizer-ready") { + this.notifyTokenizerReady(); + return; + } + if (response.type === "encoding-loaded") { + this.notifyEncodingLoaded(response.encodingName); + return; + } + assert(false, "Received unknown worker notification type"); + return; + } + // Ignore responses for cancelled requests if (!this.pendingRequest || this.pendingRequest.id !== response.id) { return; @@ -104,5 +164,44 @@ export class TokenStatsWorker { this.pendingRequest = null; } this.worker.terminate(); + this.tokenizerReadyListeners.clear(); + this.encodingListeners.clear(); + this.loadedEncodings.clear(); + this.tokenizerReady = false; + } + + private notifyTokenizerReady(): void { + this.tokenizerReady = true; + if (this.tokenizerReadyListeners.size === 0) { + return; + } + for (const listener of this.tokenizerReadyListeners) { + try { + listener(); + } catch (error) { + console.error("[TokenStatsWorker] Tokenizer ready listener threw", error); + } + } + } + + private notifyEncodingLoaded(encodingName: string): void { + assert( + typeof encodingName === "string" && encodingName.length > 0, + "Tokenizer encoding notifications require a non-empty encoding name" + ); + this.loadedEncodings.add(encodingName); + if (this.encodingListeners.size === 0) { + return; + } + for (const listener of this.encodingListeners) { + try { + listener(encodingName); + } catch (error) { + console.error( + `[TokenStatsWorker] Tokenizer encoding listener threw for '${encodingName}'`, + error + ); + } + } } } diff --git a/src/utils/tokens/displayUsage.ts b/src/utils/tokens/displayUsage.ts new file mode 100644 index 000000000..b98c5e771 --- /dev/null +++ b/src/utils/tokens/displayUsage.ts @@ -0,0 +1,92 @@ +/** + * Display usage utilities for renderer + * + * IMPORTANT: This file must NOT import tokenizer to avoid pulling Node.js + * dependencies into the renderer bundle. + */ + +import type { LanguageModelV2Usage } from "@ai-sdk/provider"; +import { getModelStats } from "./modelStats"; +import type { ChatUsageDisplay } from "./usageAggregator"; + +/** + * Create a display-friendly usage object from AI SDK usage + * + * This function transforms raw AI SDK usage data into a format suitable + * for display in the UI. It does NOT require the tokenizer. + */ +export function createDisplayUsage( + usage: LanguageModelV2Usage | undefined, + model: string, + providerMetadata?: Record +): ChatUsageDisplay | undefined { + if (!usage) return undefined; + + // Provider-specific token handling: + // - OpenAI: inputTokens is INCLUSIVE of cachedInputTokens + // - Anthropic: inputTokens EXCLUDES cachedInputTokens + const cachedTokens = usage.cachedInputTokens ?? 0; + const rawInputTokens = usage.inputTokens ?? 0; + + // Detect provider from model string + const isOpenAI = model.startsWith("openai:"); + + // For OpenAI, subtract cached tokens to get uncached input tokens + const inputTokens = isOpenAI ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens; + + // Extract cache creation tokens from provider metadata (Anthropic-specific) + const cacheCreateTokens = + (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined) + ?.cacheCreationInputTokens ?? 0; + + // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific) + const reasoningTokens = + usage.reasoningTokens ?? + (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ?? + 0; + + // Calculate output tokens excluding reasoning + const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens); + + // Get model stats for cost calculation + const modelStats = getModelStats(model); + + // Calculate costs based on model stats (undefined if model unknown) + let inputCost: number | undefined; + let cachedCost: number | undefined; + let cacheCreateCost: number | undefined; + let outputCost: number | undefined; + let reasoningCost: number | undefined; + + if (modelStats) { + inputCost = inputTokens * modelStats.input_cost_per_token; + cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0); + cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0); + outputCost = outputWithoutReasoning * modelStats.output_cost_per_token; + reasoningCost = reasoningTokens * modelStats.output_cost_per_token; + } + + return { + input: { + tokens: inputTokens, + cost_usd: inputCost, + }, + cached: { + tokens: cachedTokens, + cost_usd: cachedCost, + }, + cacheCreate: { + tokens: cacheCreateTokens, + cost_usd: cacheCreateCost, + }, + output: { + tokens: outputWithoutReasoning, + cost_usd: outputCost, + }, + reasoning: { + tokens: reasoningTokens, + cost_usd: reasoningCost, + }, + model, // Include model for display purposes + }; +} diff --git a/src/utils/tokens/tokenStats.worker.ts b/src/utils/tokens/tokenStats.worker.ts index ce401e19d..4be5e0b7a 100644 --- a/src/utils/tokens/tokenStats.worker.ts +++ b/src/utils/tokens/tokenStats.worker.ts @@ -5,6 +5,7 @@ import type { CmuxMessage } from "@/types/message"; import type { ChatStats } from "@/types/chatStats"; +import { onTokenizerEncodingLoaded, onTokenizerModulesLoaded } from "@/utils/main/tokenizer"; import { calculateTokenStats } from "./tokenStatsCalculator"; export interface WorkerRequest { @@ -25,6 +26,10 @@ export interface WorkerError { error: string; } +export type WorkerNotification = + | { type: "tokenizer-ready" } + | { type: "encoding-loaded"; encodingName: string }; + // Handle incoming calculation requests self.onmessage = (e: MessageEvent) => { const { id, messages, model } = e.data; @@ -46,3 +51,19 @@ self.onmessage = (e: MessageEvent) => { self.postMessage(errorResponse); } }; + +onTokenizerModulesLoaded(() => { + const notification: WorkerNotification = { type: "tokenizer-ready" }; + self.postMessage(notification); +}); + +onTokenizerEncodingLoaded((encodingName) => { + if (typeof encodingName !== "string" || encodingName.length === 0) { + throw new Error("Worker received invalid tokenizer encoding name"); + } + const notification: WorkerNotification = { + type: "encoding-loaded", + encodingName, + }; + self.postMessage(notification); +}); diff --git a/src/utils/tokens/tokenStatsCalculator.ts b/src/utils/tokens/tokenStatsCalculator.ts index 8507be873..3f1542507 100644 --- a/src/utils/tokens/tokenStatsCalculator.ts +++ b/src/utils/tokens/tokenStatsCalculator.ts @@ -1,101 +1,23 @@ /** - * Shared token statistics calculation logic - * Used by both frontend (WorkspaceStore) and backend (debug commands) + * Main-process-only token statistics calculation logic + * Used by backend (debug commands) and worker threads * - * IMPORTANT: This utility is intentionally abstracted so that the debug command - * (`bun debug costs`) has exact parity with the UI display in the Costs tab. - * Any changes to token calculation logic should be made here to maintain consistency. + * IMPORTANT: This file imports tokenizer and should ONLY be used in main process. + * For renderer-safe usage utilities, use displayUsage.ts instead. */ import type { CmuxMessage } from "@/types/message"; import type { ChatStats, TokenConsumer } from "@/types/chatStats"; -import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import { getTokenizerForModel, countTokensForData, getToolDefinitionTokens, } from "@/utils/main/tokenizer"; -import { getModelStats } from "./modelStats"; +import { createDisplayUsage } from "./displayUsage"; import type { ChatUsageDisplay } from "./usageAggregator"; -/** - * Create a display-friendly usage object from AI SDK usage - */ -export function createDisplayUsage( - usage: LanguageModelV2Usage | undefined, - model: string, - providerMetadata?: Record -): ChatUsageDisplay | undefined { - if (!usage) return undefined; - - // Provider-specific token handling: - // - OpenAI: inputTokens is INCLUSIVE of cachedInputTokens - // - Anthropic: inputTokens EXCLUDES cachedInputTokens - const cachedTokens = usage.cachedInputTokens ?? 0; - const rawInputTokens = usage.inputTokens ?? 0; - - // Detect provider from model string - const isOpenAI = model.startsWith("openai:"); - - // For OpenAI, subtract cached tokens to get uncached input tokens - const inputTokens = isOpenAI ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens; - - // Extract cache creation tokens from provider metadata (Anthropic-specific) - const cacheCreateTokens = - (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined) - ?.cacheCreationInputTokens ?? 0; - - // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific) - const reasoningTokens = - usage.reasoningTokens ?? - (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ?? - 0; - - // Calculate output tokens excluding reasoning - const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens); - - // Get model stats for cost calculation - const modelStats = getModelStats(model); - - // Calculate costs based on model stats (undefined if model unknown) - let inputCost: number | undefined; - let cachedCost: number | undefined; - let cacheCreateCost: number | undefined; - let outputCost: number | undefined; - let reasoningCost: number | undefined; - - if (modelStats) { - inputCost = inputTokens * modelStats.input_cost_per_token; - cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0); - cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0); - outputCost = outputWithoutReasoning * modelStats.output_cost_per_token; - reasoningCost = reasoningTokens * modelStats.output_cost_per_token; - } - - return { - input: { - tokens: inputTokens, - cost_usd: inputCost, - }, - cached: { - tokens: cachedTokens, - cost_usd: cachedCost, - }, - cacheCreate: { - tokens: cacheCreateTokens, - cost_usd: cacheCreateCost, - }, - output: { - tokens: outputWithoutReasoning, - cost_usd: outputCost, - }, - reasoning: { - tokens: reasoningTokens, - cost_usd: reasoningCost, - }, - model, // Include model for display purposes - }; -} +// Re-export for backward compatibility +export { createDisplayUsage }; /** * Calculate token statistics from raw CmuxMessages diff --git a/tests/e2e/utils/ui.ts b/tests/e2e/utils/ui.ts index dec476329..3c504fbb8 100644 --- a/tests/e2e/utils/ui.ts +++ b/tests/e2e/utils/ui.ts @@ -165,7 +165,7 @@ export function createWorkspaceUI(page: Page, context: DemoProjectConfig): Works action: () => Promise, options?: { timeoutMs?: number } ): Promise { - const timeoutMs = options?.timeoutMs ?? 12_000; + const timeoutMs = options?.timeoutMs ?? 20_000; const workspaceId = context.workspaceId; await page.evaluate((id: string) => { type StreamCaptureEvent = { diff --git a/vite.config.ts b/vite.config.ts index 5be854261..57d4b34b7 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -43,10 +43,7 @@ const basePlugins = [ export default defineConfig(({ mode }) => ({ // This prevents mermaid initialization errors in production while allowing dev to work - plugins: - mode === "development" - ? [...basePlugins, topLevelAwait()] - : basePlugins, + plugins: mode === "development" ? [...basePlugins, topLevelAwait()] : basePlugins, resolve: { alias, }, @@ -62,6 +59,17 @@ export default defineConfig(({ mode }) => ({ format: "es", inlineDynamicImports: false, sourcemapExcludeSources: false, + manualChunks(id) { + const normalizedId = id.split(path.sep).join("/"); + if (normalizedId.includes("node_modules/ai-tokenizer/encoding/")) { + const chunkName = path.basename(id, path.extname(id)); + return `tokenizer-encoding-${chunkName}`; + } + if (normalizedId.includes("node_modules/ai-tokenizer/")) { + return "tokenizer-base"; + } + return undefined; + }, }, }, chunkSizeWarningLimit: 2000,