diff --git a/eslint.config.mjs b/eslint.config.mjs
index 65045f056..15ab04f76 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -317,6 +317,7 @@ export default defineConfig([
       "src/services/aiService.ts",
       "src/utils/tools/tools.ts",
       "src/utils/ai/providerFactory.ts",
+      "src/utils/main/tokenizer.ts",
     ],
     rules: {
       "no-restricted-syntax": "off",
diff --git a/src/components/ChatMetaSidebar.tsx b/src/components/ChatMetaSidebar.tsx
new file mode 100644
index 000000000..f6e1ec648
--- /dev/null
+++ b/src/components/ChatMetaSidebar.tsx
@@ -0,0 +1,121 @@
+import React from "react";
+import styled from "@emotion/styled";
+import { usePersistedState } from "@/hooks/usePersistedState";
+import { useWorkspaceUsage } from "@/stores/WorkspaceStore";
+import { use1MContext } from "@/hooks/use1MContext";
+import { useResizeObserver } from "@/hooks/useResizeObserver";
+import { CostsTab } from "./RightSidebar/CostsTab";
+import { VerticalTokenMeter } from "./RightSidebar/VerticalTokenMeter";
+import { calculateTokenMeterData } from "@/utils/tokens/tokenMeterUtils";
+
+interface SidebarContainerProps {
+  collapsed: boolean;
+}
+
+const SidebarContainer = styled.div<SidebarContainerProps>`
+  width: ${(props) => (props.collapsed ? "20px" : "300px")};
+  background: #252526;
+  border-left: 1px solid #3e3e42;
+  display: flex;
+  flex-direction: column;
+  overflow: hidden;
+  transition: width 0.2s ease;
+  flex-shrink: 0;
+
+  /* Keep vertical bar always visible when collapsed */
+  ${(props) =>
+    props.collapsed &&
+    `
+    position: sticky;
+    right: 0;
+    z-index: 10;
+    box-shadow: -2px 0 4px rgba(0, 0, 0, 0.2);
+  `}
+`;
+
+const FullView = styled.div<{ visible: boolean }>`
+  display: ${(props) => (props.visible ? "flex" : "none")};
+  flex-direction: column;
+  height: 100%;
+`;
+
+const CollapsedView = styled.div<{ visible: boolean }>`
+  display: ${(props) => (props.visible ? "flex" : "none")};
+  height: 100%;
+`;
+
+const ContentScroll = styled.div`
+  flex: 1;
+  overflow-y: auto;
+  padding: 15px;
+`;
+
+interface ChatMetaSidebarProps {
+  workspaceId: string;
+  chatAreaRef: React.RefObject<HTMLDivElement>;
+}
+
+const ChatMetaSidebarComponent: React.FC<ChatMetaSidebarProps> = ({ workspaceId, chatAreaRef }) => {
+  const usage = useWorkspaceUsage(workspaceId);
+  const [use1M] = use1MContext();
+  const chatAreaSize = useResizeObserver(chatAreaRef);
+
+  const lastUsage = usage?.usageHistory[usage.usageHistory.length - 1];
+
+  // Memoize vertical meter data calculation to prevent unnecessary re-renders
+  const verticalMeterData = React.useMemo(() => {
+    // Get model from last usage
+    const model = lastUsage?.model ?? "unknown";
+    return lastUsage
+      ? calculateTokenMeterData(lastUsage, model, use1M, true)
+      : { segments: [], totalTokens: 0, totalPercentage: 0 };
+  }, [lastUsage, use1M]);
+
+  // Calculate if we should show collapsed view with hysteresis
+  // Strategy: Observe ChatArea width directly (independent of sidebar width)
+  // - ChatArea has min-width: 750px and flex: 1
+  // - Use hysteresis to prevent oscillation:
+  //   * Collapse when chatAreaWidth <= 800px (tight space)
+  //   * Expand when chatAreaWidth >= 1100px (lots of space)
+  //   * Between 800-1100: maintain current state (dead zone)
+  const COLLAPSE_THRESHOLD = 800; // Collapse below this
+  const EXPAND_THRESHOLD = 1100; // Expand above this
+  const chatAreaWidth = chatAreaSize?.width ?? 1000; // Default to large to avoid flash
+
+  // Persist collapsed state globally (not per-workspace) since chat area width is shared
+  // This prevents animation flash when switching workspaces - sidebar maintains its state
+  const [showCollapsed, setShowCollapsed] = usePersistedState<boolean>(
+    "chat-meta-sidebar:collapsed",
+    false
+  );
+
+  React.useEffect(() => {
+    if (chatAreaWidth <= COLLAPSE_THRESHOLD) {
+      setShowCollapsed(true);
+    } else if (chatAreaWidth >= EXPAND_THRESHOLD) {
+      setShowCollapsed(false);
+    }
+    // Between thresholds: maintain current state (no change)
+  }, [chatAreaWidth, setShowCollapsed]);
+
+  return (
+    <SidebarContainer
+      collapsed={showCollapsed}
+      role="complementary"
+      aria-label="Workspace insights"
+    >
+      <FullView visible={!showCollapsed}>
+        <ContentScroll role="region" aria-label="Cost breakdown">
+          <CostsTab workspaceId={workspaceId} />
+        </ContentScroll>
+      </FullView>
+      <CollapsedView visible={showCollapsed}>
+        <VerticalTokenMeter data={verticalMeterData} />
+      </CollapsedView>
+    </SidebarContainer>
+  );
+};
+
+// Memoize to prevent re-renders when parent (AIView) re-renders during streaming
+// Only re-renders when workspaceId or chatAreaRef changes, or internal state updates
+export const ChatMetaSidebar = React.memo(ChatMetaSidebarComponent);
diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts
index e4cc27ce8..ab2ef5f1f 100644
--- a/src/debug/agentSessionCli.ts
+++ b/src/debug/agentSessionCli.ts
@@ -1,6 +1,6 @@
 #!/usr/bin/env bun
 
-import assert from "node:assert/strict";
+import assert from "@/utils/assert";
 import * as fs from "fs/promises";
 import * as path from "path";
 import { parseArgs } from "util";
diff --git a/src/debug/chatExtractors.ts b/src/debug/chatExtractors.ts
index 1650cf31d..413daf867 100644
--- a/src/debug/chatExtractors.ts
+++ b/src/debug/chatExtractors.ts
@@ -1,4 +1,4 @@
-import assert from "node:assert/strict";
+import assert from "@/utils/assert";
 import type { CmuxReasoningPart, CmuxTextPart, CmuxToolPart } from "@/types/message";
 
 export function extractAssistantText(parts: unknown): string {
diff --git a/src/main-desktop.ts b/src/main-desktop.ts
index 58ed794dc..7209ddebb 100644
--- a/src/main-desktop.ts
+++ b/src/main-desktop.ts
@@ -353,6 +353,9 @@ function createWindow() {
   const windowWidth = Math.max(1200, Math.floor(screenWidth * 0.8));
   const windowHeight = Math.max(800, Math.floor(screenHeight * 0.8));
 
+  console.log(`[${timestamp()}] [window] Creating BrowserWindow...`);
+  console.time("[window] BrowserWindow creation");
+
   mainWindow = new BrowserWindow({
     width: windowWidth,
     height: windowHeight,
@@ -368,8 +371,13 @@ function createWindow() {
     show: false, // Don't show until ready-to-show event
   });
 
+  console.timeEnd("[window] BrowserWindow creation");
+
   // Register IPC handlers with the main window
+  console.log(`[${timestamp()}] [window] Registering IPC handlers...`);
+  console.time("[window] IPC registration");
   ipcMain.register(electronIpcMain, mainWindow);
+  console.timeEnd("[window] IPC registration");
 
   // Register updater IPC handlers (available in both dev and prod)
   electronIpcMain.handle(IPC_CHANNELS.UPDATE_CHECK, () => {
@@ -415,10 +423,12 @@ function createWindow() {
   }
 
   // Show window once it's ready and close splash
+  console.time("main window startup");
   mainWindow.once("ready-to-show", () => {
     console.log(`[${timestamp()}] Main window ready to show`);
     mainWindow?.show();
     closeSplashScreen();
+    console.timeEnd("main window startup");
   });
 
   // Open all external links in default browser
@@ -439,10 +449,14 @@ function createWindow() {
 
   // Load from dev server in development, built files in production
   // app.isPackaged is true when running from a built .app/.exe, false in development
+  console.log(`[${timestamp()}] [window] Loading content...`);
+  console.time("[window] Content load");
   if ((isE2ETest && !forceDistLoad) || (!app.isPackaged && !forceDistLoad)) {
     // Development mode: load from vite dev server
     const devHost = process.env.CMUX_DEVSERVER_HOST ?? "127.0.0.1";
-    void mainWindow.loadURL(`http://${devHost}:${devServerPort}`);
+    const url = `http://${devHost}:${devServerPort}`;
+    console.log(`[${timestamp()}] [window] Loading from dev server: ${url}`);
+    void mainWindow.loadURL(url);
     if (!isE2ETest) {
       mainWindow.webContents.once("did-finish-load", () => {
         mainWindow?.webContents.openDevTools();
@@ -450,9 +464,22 @@ function createWindow() {
     }
   } else {
     // Production mode: load built files
-    void mainWindow.loadFile(path.join(__dirname, "index.html"));
+    const htmlPath = path.join(__dirname, "index.html");
+    console.log(`[${timestamp()}] [window] Loading from file: ${htmlPath}`);
+    void mainWindow.loadFile(htmlPath);
   }
 
+  // Track when content finishes loading
+  mainWindow.webContents.once("did-finish-load", () => {
+    console.timeEnd("[window] Content load");
+    console.log(`[${timestamp()}] [window] Content finished loading`);
+
+    // NOTE: Tokenizer modules are NOT loaded at startup anymore!
+    // The Proxy in tokenizer.ts loads them on-demand when first accessed.
+    // This reduces startup time from ~8s to <1s.
+    // First token count will use approximation, accurate count caches in background.
+  });
+
   mainWindow.on("closed", () => {
     mainWindow = null;
   });
@@ -492,15 +519,7 @@ if (gotTheLock) {
       createWindow();
       // Note: splash closes in ready-to-show event handler
 
-      // Start loading tokenizer modules in background after window is created
-      // This ensures accurate token counts for first API calls (especially in e2e tests)
-      // Loading happens asynchronously and won't block the UI
-      if (loadTokenizerModulesFn) {
-        void loadTokenizerModulesFn().then(() => {
-          console.log(`[${timestamp()}] Tokenizer modules loaded`);
-        });
-      }
-      // No need to auto-start workspaces anymore - they start on demand
+      // Tokenizer modules load in background after did-finish-load event (see createWindow())
     } catch (error) {
       console.error(`[${timestamp()}] Startup failed:`, error);
 
diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts
index 23b2c653a..394d631cd 100644
--- a/src/services/agentSession.ts
+++ b/src/services/agentSession.ts
@@ -1,4 +1,4 @@
-import assert from "node:assert/strict";
+import assert from "@/utils/assert";
 import { EventEmitter } from "events";
 import * as path from "path";
 import { createCmuxMessage } from "@/types/message";
@@ -13,6 +13,7 @@ import { createUnknownSendMessageError } from "@/services/utils/sendMessageError
 import type { Result } from "@/types/result";
 import { Ok, Err } from "@/types/result";
 import { enforceThinkingPolicy } from "@/utils/thinking/policy";
+import { loadTokenizerForModel } from "@/utils/main/tokenizer";
 
 interface ImagePart {
   url: string;
@@ -302,6 +303,19 @@ export class AgentSession {
     modelString: string,
     options?: SendMessageOptions
   ): Promise<Result<void, SendMessageError>> {
+    try {
+      assert(
+        typeof modelString === "string" && modelString.trim().length > 0,
+        "modelString must be a non-empty string"
+      );
+      await loadTokenizerForModel(modelString);
+    } catch (error) {
+      const reason = error instanceof Error ? error.message : String(error);
+      return Err(
+        createUnknownSendMessageError(`Failed to preload tokenizer for ${modelString}: ${reason}`)
+      );
+    }
+
     const commitResult = await this.partialService.commitToHistory(this.workspaceId);
     if (!commitResult.success) {
       return Err(createUnknownSendMessageError(commitResult.error));
diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts
index b261e8014..465d9ad21 100644
--- a/src/services/ipcMain.ts
+++ b/src/services/ipcMain.ts
@@ -1,4 +1,4 @@
-import assert from "node:assert/strict";
+import assert from "@/utils/assert";
 import type { BrowserWindow, IpcMain as ElectronIpcMain } from "electron";
 import { spawn, spawnSync } from "child_process";
 import * as fsPromises from "fs/promises";
diff --git a/src/services/utils/sendMessageError.ts b/src/services/utils/sendMessageError.ts
index 6b3449779..a14d7bdce 100644
--- a/src/services/utils/sendMessageError.ts
+++ b/src/services/utils/sendMessageError.ts
@@ -1,4 +1,4 @@
-import assert from "node:assert/strict";
+import assert from "@/utils/assert";
 import type { SendMessageError } from "@/types/errors";
 
 /**
diff --git a/src/stores/WorkspaceConsumerManager.ts b/src/stores/WorkspaceConsumerManager.ts
index b0cee2f20..dcbb48063 100644
--- a/src/stores/WorkspaceConsumerManager.ts
+++ b/src/stores/WorkspaceConsumerManager.ts
@@ -1,3 +1,4 @@
+import assert from "@/utils/assert";
 import type { WorkspaceConsumersState } from "./WorkspaceStore";
 import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker";
 import type { StreamingMessageAggregator } from "@/utils/messages/StreamingMessageAggregator";
@@ -48,11 +49,24 @@ export class WorkspaceConsumerManager {
   // Callback to bump the store when calculation completes
   private readonly onCalculationComplete: (workspaceId: string) => void;
 
+  // Track pending store notifications to avoid duplicate bumps within the same tick
+  private pendingNotifications = new Set<string>();
+
   constructor(onCalculationComplete: (workspaceId: string) => void) {
     this.tokenWorker = new TokenStatsWorker();
     this.onCalculationComplete = onCalculationComplete;
   }
 
+  onTokenizerReady(listener: () => void): () => void {
+    assert(typeof listener === "function", "Tokenizer ready listener must be a function");
+    return this.tokenWorker.onTokenizerReady(listener);
+  }
+
+  onTokenizerEncodingLoaded(listener: (encodingName: string) => void): () => void {
+    assert(typeof listener === "function", "Tokenizer encoding listener must be a function");
+    return this.tokenWorker.onEncodingLoaded(listener);
+  }
+
   /**
    * Get cached state without side effects.
    * Returns null if no cache exists.
@@ -117,7 +131,7 @@ export class WorkspaceConsumerManager {
 
     // Notify store if newly scheduled (triggers UI update)
     if (isNewSchedule) {
-      this.onCalculationComplete(workspaceId);
+      this.notifyStoreAsync(workspaceId);
     }
 
     // Set new timer (150ms - imperceptible to humans, batches rapid events)
@@ -143,7 +157,7 @@ export class WorkspaceConsumerManager {
     this.pendingCalcs.add(workspaceId);
 
     // Mark as calculating and notify store
-    this.onCalculationComplete(workspaceId);
+    this.notifyStoreAsync(workspaceId);
 
     // Run in next tick to avoid blocking caller
     void (async () => {
@@ -170,7 +184,7 @@ export class WorkspaceConsumerManager {
         });
 
         // Notify store to trigger re-render
-        this.onCalculationComplete(workspaceId);
+        this.notifyStoreAsync(workspaceId);
       } catch (error) {
         // Cancellations are expected during rapid events - don't cache, don't log
         // This allows lazy trigger to retry on next access
@@ -186,7 +200,7 @@ export class WorkspaceConsumerManager {
           totalTokens: 0,
           isCalculating: false,
         });
-        this.onCalculationComplete(workspaceId);
+        this.notifyStoreAsync(workspaceId);
       } finally {
         this.pendingCalcs.delete(workspaceId);
 
@@ -200,6 +214,26 @@ export class WorkspaceConsumerManager {
     })();
   }
 
+  private notifyStoreAsync(workspaceId: string): void {
+    if (this.pendingNotifications.has(workspaceId)) {
+      return;
+    }
+
+    this.pendingNotifications.add(workspaceId);
+
+    const schedule =
+      typeof queueMicrotask === "function"
+        ? queueMicrotask
+        : (callback: () => void) => {
+            void Promise.resolve().then(callback);
+          };
+
+    schedule(() => {
+      this.pendingNotifications.delete(workspaceId);
+      this.onCalculationComplete(workspaceId);
+    });
+  }
+
   /**
    * Remove workspace state and cleanup timers.
    */
@@ -216,6 +250,7 @@ export class WorkspaceConsumerManager {
     this.scheduledCalcs.delete(workspaceId);
     this.pendingCalcs.delete(workspaceId);
     this.needsRecalc.delete(workspaceId);
+    this.pendingNotifications.delete(workspaceId);
   }
 
   /**
@@ -235,5 +270,7 @@ export class WorkspaceConsumerManager {
     this.cache.clear();
     this.scheduledCalcs.clear();
     this.pendingCalcs.clear();
+    this.needsRecalc.clear();
+    this.pendingNotifications.clear();
   }
 }
diff --git a/src/stores/WorkspaceStore.ts b/src/stores/WorkspaceStore.ts
index afc265210..b91782b32 100644
--- a/src/stores/WorkspaceStore.ts
+++ b/src/stores/WorkspaceStore.ts
@@ -1,3 +1,4 @@
+import assert from "@/utils/assert";
 import type { CmuxMessage, DisplayedMessage } from "@/types/message";
 import { createCmuxMessage } from "@/types/message";
 import type { FrontendWorkspaceMetadata } from "@/types/workspace";
@@ -23,7 +24,7 @@ import {
   isReasoningEnd,
 } from "@/types/ipc";
 import { MapStore } from "./MapStore";
-import { createDisplayUsage } from "@/utils/tokens/tokenStatsCalculator";
+import { createDisplayUsage } from "@/utils/tokens/displayUsage";
 import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
 import type { ChatUsageDisplay } from "@/utils/tokens/usageAggregator";
 import type { TokenConsumer } from "@/types/chatStats";
@@ -112,6 +113,7 @@ export class WorkspaceStore {
   // Architecture: WorkspaceStore orchestrates (decides when), manager executes (performs calculations)
   // Dual-cache: consumersStore (MapStore) handles subscriptions, manager owns data cache
   private readonly consumerManager: WorkspaceConsumerManager;
+  private readonly cleanupTokenizerReady: () => void;
 
   // Supporting data structures
   private aggregators = new Map<string, StreamingMessageAggregator>();
@@ -143,6 +145,31 @@ export class WorkspaceStore {
       this.consumersStore.bump(workspaceId);
     });
 
+    const rescheduleConsumers = () => {
+      for (const [workspaceId, aggregator] of this.aggregators.entries()) {
+        assert(
+          workspaceId.length > 0,
+          "Workspace ID must be non-empty when rescheduling consumers"
+        );
+        if (!this.caughtUp.get(workspaceId)) {
+          continue;
+        }
+        if (aggregator.getAllMessages().length === 0) {
+          continue;
+        }
+        this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+      }
+    };
+
+    const cleanupReady = this.consumerManager.onTokenizerReady(rescheduleConsumers);
+    const cleanupEncoding = this.consumerManager.onTokenizerEncodingLoaded(() => {
+      rescheduleConsumers();
+    });
+    this.cleanupTokenizerReady = () => {
+      cleanupReady();
+      cleanupEncoding();
+    };
+
     // Note: We DON'T auto-check recency on every state bump.
     // Instead, checkAndBumpRecencyIfChanged() is called explicitly after
     // message completion events (not on deltas) to prevent App.tsx re-renders.
@@ -714,6 +741,7 @@ export class WorkspaceStore {
   dispose(): void {
     // Clean up consumer manager
     this.consumerManager.dispose();
+    this.cleanupTokenizerReady();
 
     for (const unsubscribe of this.ipcUnsubscribers.values()) {
       unsubscribe();
diff --git a/src/utils/assert.ts b/src/utils/assert.ts
new file mode 100644
index 000000000..0e061f6cb
--- /dev/null
+++ b/src/utils/assert.ts
@@ -0,0 +1,16 @@
+// Browser-safe assertion helper for renderer and worker bundles.
+// Throws immediately when invariants are violated so bugs surface early.
+export class AssertionError extends Error {
+  constructor(message?: string) {
+    super(message ?? "Assertion failed");
+    this.name = "AssertionError";
+  }
+}
+
+export function assert(condition: unknown, message?: string): asserts condition {
+  if (!condition) {
+    throw new AssertionError(message);
+  }
+}
+
+export default assert;
diff --git a/src/utils/main/tokenizer.test.ts b/src/utils/main/tokenizer.test.ts
new file mode 100644
index 000000000..c93605f1e
--- /dev/null
+++ b/src/utils/main/tokenizer.test.ts
@@ -0,0 +1,78 @@
+import { beforeEach, describe, expect, test } from "bun:test";
+
+import {
+  __resetTokenizerForTests,
+  getTokenizerForModel,
+  loadTokenizerForModel,
+  loadTokenizerModules,
+  onTokenizerEncodingLoaded,
+} from "./tokenizer";
+
+beforeEach(() => {
+  __resetTokenizerForTests();
+});
+
+describe("tokenizer caching", () => {
+  test("does not cache fallback approximations", async () => {
+    await loadTokenizerModules();
+
+    const model = "openai:gpt-4-turbo";
+    const tokenizer = getTokenizerForModel(model);
+    const text = "cmux-fallback-check-" + "a".repeat(40);
+
+    const fallbackCount = tokenizer.countTokens(text);
+    const approximation = Math.ceil(text.length / 4);
+    expect(fallbackCount).toBe(approximation);
+
+    await loadTokenizerForModel(model);
+
+    const accurateCount = tokenizer.countTokens(text);
+
+    expect(accurateCount).not.toBe(fallbackCount);
+    expect(accurateCount).toBeGreaterThan(0);
+  });
+
+  test("replays loaded encodings for late listeners", async () => {
+    const model = "openai:gpt-4o";
+    await loadTokenizerForModel(model);
+
+    const received: string[] = [];
+    const unsubscribe = onTokenizerEncodingLoaded((encodingName) => {
+      received.push(encodingName);
+    });
+    unsubscribe();
+
+    expect(received.length).toBeGreaterThan(0);
+    expect(received).toContain("o200k_base");
+  });
+
+  test("accurate counts replace fallback approximations", async () => {
+    const model = "openai:gpt-4-turbo";
+    const tokenizer = getTokenizerForModel(model);
+    const text = "cmux-accuracy-check-" + "b".repeat(80);
+
+    let unsubscribe: () => void = () => undefined;
+    const encodingReady = new Promise<void>((resolve) => {
+      unsubscribe = onTokenizerEncodingLoaded((encodingName) => {
+        if (encodingName === "cl100k_base") {
+          unsubscribe();
+          resolve();
+        }
+      });
+    });
+
+    const fallbackCount = tokenizer.countTokens(text);
+    const approximation = Math.ceil(text.length / 4);
+    expect(fallbackCount).toBe(approximation);
+
+    await encodingReady;
+    await Promise.resolve();
+
+    const accurateCount = tokenizer.countTokens(text);
+    expect(accurateCount).not.toBe(fallbackCount);
+    expect(accurateCount).toBeGreaterThan(0);
+
+    const cachedCount = tokenizer.countTokens(text);
+    expect(cachedCount).toBe(accurateCount);
+  });
+});
diff --git a/src/utils/main/tokenizer.ts b/src/utils/main/tokenizer.ts
index 4c8bce7c0..862e5d162 100644
--- a/src/utils/main/tokenizer.ts
+++ b/src/utils/main/tokenizer.ts
@@ -1,7 +1,7 @@
 /**
  * Token calculation utilities for chat statistics
  */
-
+import assert from "@/utils/assert";
 import { LRUCache } from "lru-cache";
 import CRC32 from "crc-32";
 import { getToolSchemas, getAvailableTools } from "@/utils/tools/toolDefinitions";
@@ -11,58 +11,424 @@ export interface Tokenizer {
   countTokens: (text: string) => number;
 }
 
-/**
- * Lazy-loaded tokenizer modules to reduce startup time
- * These are loaded on first use with /4 approximation fallback
- *
- * eslint-disable-next-line @typescript-eslint/consistent-type-imports -- Dynamic imports are intentional for lazy loading
- */
-interface TokenizerModuleImports {
+interface TokenizerBaseModules {
+  // Base module properties (always required)
   // eslint-disable-next-line @typescript-eslint/consistent-type-imports
   AITokenizer: typeof import("ai-tokenizer").default;
   // eslint-disable-next-line @typescript-eslint/consistent-type-imports
   models: typeof import("ai-tokenizer").models;
-  // eslint-disable-next-line @typescript-eslint/consistent-type-imports
-  o200k_base: typeof import("ai-tokenizer/encoding/o200k_base");
-  // eslint-disable-next-line @typescript-eslint/consistent-type-imports
-  claude: typeof import("ai-tokenizer/encoding/claude");
 }
 
-let tokenizerModules: TokenizerModuleImports | null = null;
+// eslint-disable-next-line @typescript-eslint/consistent-type-imports
+type EncodingModule = import("ai-tokenizer").Encoding;
+
+const BASE_MODULE_PROPS = ["AITokenizer", "models"] as const satisfies ReadonlyArray<
+  keyof TokenizerBaseModules
+>;
+
+const KNOWN_ENCODINGS = ["o200k_base", "claude"] as const;
+
+/**
+ * Dynamic imports below are deliberate to keep ~2MB encoding bundles out of the initial
+ * startup path. See eslint.config.mjs for the scoped override that documents this policy.
+ */
+
+/**
+ * Module cache - stores loaded modules
+ */
+const moduleCache: {
+  base: TokenizerBaseModules | null;
+  encodings: Map<string, EncodingModule>;
+} = {
+  base: null,
+  encodings: new Map<string, EncodingModule>(),
+};
+
+let baseLoadPromise: Promise<TokenizerBaseModules> | null = null;
+const encodingLoadPromises = new Map<string, Promise<EncodingModule>>();
+
+type TokenizerReadyListener = () => void;
+const readyListeners = new Set<TokenizerReadyListener>();
+let tokenizerModulesReady = false;
+
+type TokenizerEncodingListener = (encodingName: string) => void;
+const encodingListeners = new Set<TokenizerEncodingListener>();
+
+function isTokenizerReady(): boolean {
+  return moduleCache.base !== null && moduleCache.encodings.size > 0;
+}
+
+function now(): number {
+  const perf = globalThis.performance;
+  if (perf && typeof perf.now === "function") {
+    return perf.now.call(perf);
+  }
+  return Date.now();
+}
+
+interface Logger {
+  info: (...args: unknown[]) => void;
+  error: (...args: unknown[]) => void;
+  debug: (...args: unknown[]) => void;
+}
+
+const consoleLogger: Logger = {
+  info: (...args) => console.log(...args),
+  error: (...args) => console.error(...args),
+  debug: (...args) => {
+    if (typeof process !== "undefined" && process.env?.CMUX_DEBUG) {
+      console.debug(...args);
+    }
+  },
+};
+
+let activeLogger: Logger = consoleLogger;
+
+// Lazy-import log.ts in the Electron main process only to keep renderer bundles small.
+if (typeof process !== "undefined" && process.type === "browser") {
+  void import("@/services/log")
+    .then((module) => {
+      activeLogger = module.log;
+    })
+    .catch(() => {
+      // Fallback to console logging when log.ts is unavailable (tests, worker builds).
+    });
+}
+
+const logger: Logger = {
+  info: (...args) => activeLogger.info(...args),
+  error: (...args) => activeLogger.error(...args),
+  debug: (...args) => activeLogger.debug(...args),
+};
+
+function notifyIfTokenizerReady(): void {
+  if (tokenizerModulesReady || !isTokenizerReady()) {
+    return;
+  }
+
+  tokenizerModulesReady = true;
+  for (const listener of readyListeners) {
+    try {
+      listener();
+    } catch (error) {
+      logger.error("[tokenizer] Ready listener threw:", error);
+    }
+  }
+  readyListeners.clear();
+}
 
-let tokenizerLoadPromise: Promise<void> | null = null;
+function notifyEncodingLoaded(encodingName: string): void {
+  assert(
+    encodingName.length > 0,
+    "Tokenizer encoding notification requires non-empty encoding name"
+  );
+  if (encodingListeners.size === 0) {
+    return;
+  }
+  for (const listener of encodingListeners) {
+    try {
+      listener(encodingName);
+    } catch (error) {
+      logger.error(`[tokenizer] Encoding listener threw for '${encodingName}':`, error);
+    }
+  }
+}
 
 /**
- * Load tokenizer modules asynchronously
+ * Registers a listener fired once the tokenizer base and at least one encoding finish loading.
+ * Prefer `onTokenizerEncodingLoaded` for UI updates that need per-encoding fidelity.
+ */
+export function onTokenizerModulesLoaded(listener: () => void): () => void {
+  if (tokenizerModulesReady || isTokenizerReady()) {
+    tokenizerModulesReady = true;
+    listener();
+    return () => undefined;
+  }
+
+  readyListeners.add(listener);
+  return () => {
+    readyListeners.delete(listener);
+  };
+}
+
+export function onTokenizerEncodingLoaded(listener: TokenizerEncodingListener): () => void {
+  assert(typeof listener === "function", "Tokenizer encoding listener must be a function");
+  encodingListeners.add(listener);
+
+  // Immediately notify about already-loaded encodings so listeners can catch up.
+  for (const encodingName of moduleCache.encodings.keys()) {
+    try {
+      listener(encodingName);
+    } catch (error) {
+      logger.error(
+        `[tokenizer] Encoding listener threw for '${encodingName}' during initial replay:`,
+        error
+      );
+    }
+  }
+
+  return () => {
+    encodingListeners.delete(listener);
+  };
+}
+
+function getCachedBaseModules(): TokenizerBaseModules | null {
+  return moduleCache.base;
+}
+
+async function loadBaseModules(): Promise<TokenizerBaseModules> {
+  if (moduleCache.base) {
+    return moduleCache.base;
+  }
+
+  if (!baseLoadPromise) {
+    const timerLabel = "[tokenizer] load base module";
+    logger.info(`${timerLabel} started`);
+    baseLoadPromise = (async () => {
+      const startMs = now();
+      try {
+        const module = await import("ai-tokenizer");
+
+        assert(
+          typeof module.default === "function",
+          "Tokenizer base module default export must be a constructor"
+        );
+        assert(
+          typeof module.models === "object" && module.models !== null,
+          "Tokenizer base module must export models metadata"
+        );
+        const baseModules: TokenizerBaseModules = {
+          AITokenizer: module.default,
+          models: module.models,
+        };
+        for (const prop of BASE_MODULE_PROPS) {
+          assert(prop in baseModules, `Tokenizer base modules missing '${String(prop)}' property`);
+        }
+        moduleCache.base = baseModules;
+        notifyIfTokenizerReady();
+        return baseModules;
+      } catch (error) {
+        logger.error(
+          "[tokenizer] Failed to load base tokenizer modules; token counts will rely on approximations until retry succeeds",
+          error
+        );
+        throw error;
+      } finally {
+        const durationMs = now() - startMs;
+        logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`);
+      }
+    })();
+  }
+
+  try {
+    const baseModules = await baseLoadPromise;
+    assert(
+      moduleCache.base === baseModules,
+      "Tokenizer base modules cache must contain the loaded modules"
+    );
+    return baseModules;
+  } catch (error) {
+    moduleCache.base = null;
+    baseLoadPromise = null;
+    throw error;
+  } finally {
+    if (moduleCache.base) {
+      baseLoadPromise = null;
+    }
+  }
+}
+
+function beginLoadBase(): void {
+  void loadBaseModules().catch(() => {
+    logger.error(
+      "[tokenizer] Base tokenizer modules failed to preload; token counts will stay approximate until retry succeeds"
+    );
+    // Error already logged in loadBaseModules(); leave cache unset so callers retry.
+  });
+}
+
+function getCachedEncoding(encodingName: string): EncodingModule | undefined {
+  assert(
+    typeof encodingName === "string" && encodingName.length > 0,
+    "Tokenizer encoding name must be a non-empty string"
+  );
+  return moduleCache.encodings.get(encodingName);
+}
+
+async function loadEncodingModule(encodingName: string): Promise<EncodingModule> {
+  const cached = getCachedEncoding(encodingName);
+  if (cached) {
+    return cached;
+  }
+
+  let promise = encodingLoadPromises.get(encodingName);
+  if (!promise) {
+    const loader = ENCODING_LOADERS[encodingName];
+    assert(loader, `Tokenizer encoding loader missing for '${encodingName}'`);
+
+    const timerLabel = `[tokenizer] load encoding: ${encodingName}`;
+    logger.info(`${timerLabel} started`);
+
+    promise = (async () => {
+      const startMs = now();
+      try {
+        const module = await loader();
+        moduleCache.encodings.set(encodingName, module);
+        notifyIfTokenizerReady();
+        notifyEncodingLoaded(encodingName);
+        return module;
+      } catch (error) {
+        logger.error(
+          `[tokenizer] Failed to load tokenizer encoding '${encodingName}'; token counts will fall back to approximations`,
+          error
+        );
+        throw error;
+      } finally {
+        const durationMs = now() - startMs;
+        logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`);
+      }
+    })();
+
+    encodingLoadPromises.set(encodingName, promise);
+  }
+
+  try {
+    const encoding = await promise;
+    assert(
+      moduleCache.encodings.get(encodingName) === encoding,
+      "Tokenizer encoding cache must match the loaded encoding"
+    );
+    return encoding;
+  } catch (error) {
+    encodingLoadPromises.delete(encodingName);
+    throw error;
+  } finally {
+    if (moduleCache.encodings.has(encodingName)) {
+      encodingLoadPromises.delete(encodingName);
+    }
+  }
+}
+
+function normalizeEncodingModule(
+  encodingName: string,
+  module: Record<string, unknown>
+): EncodingModule {
+  const candidate = module as Partial<EncodingModule>;
+
+  if (typeof candidate.name !== "string" || candidate.name.length === 0) {
+    throw new Error(`Tokenizer encoding '${encodingName}' module missing name field`);
+  }
+
+  if (candidate.name !== encodingName) {
+    throw new Error(
+      `Tokenizer encoding loader mismatch: expected '${encodingName}' but received '${String(candidate.name)}'`
+    );
+  }
+
+  if (
+    typeof candidate.pat_str !== "string" ||
+    typeof candidate.special_tokens !== "object" ||
+    candidate.special_tokens === null ||
+    typeof candidate.stringEncoder !== "object" ||
+    candidate.stringEncoder === null ||
+    !Array.isArray(candidate.binaryEncoder) ||
+    typeof candidate.decoder !== "object" ||
+    candidate.decoder === null
+  ) {
+    throw new Error(`Tokenizer encoding '${encodingName}' module missing required fields`);
+  }
+
+  return {
+    name: candidate.name,
+    pat_str: candidate.pat_str,
+    special_tokens: candidate.special_tokens,
+    stringEncoder: candidate.stringEncoder,
+    binaryEncoder: candidate.binaryEncoder,
+    decoder: candidate.decoder,
+  };
+}
+
+const ENCODING_LOADERS: Record<string, () => Promise<EncodingModule>> = {
+  o200k_base: async () =>
+    normalizeEncodingModule("o200k_base", await import("ai-tokenizer/encoding/o200k_base")),
+  claude: async () =>
+    normalizeEncodingModule("claude", await import("ai-tokenizer/encoding/claude")),
+  cl100k_base: async () =>
+    normalizeEncodingModule("cl100k_base", await import("ai-tokenizer/encoding/cl100k_base")),
+  p50k_base: async () =>
+    normalizeEncodingModule("p50k_base", await import("ai-tokenizer/encoding/p50k_base")),
+};
+
+// Track if loadTokenizerModules() is already in progress
+let eagerLoadPromise: Promise<void> | null = null;
+
+/**
+ * Load tokenizer modules asynchronously (eager mode - loads all known encodings)
  * Dynamic imports are intentional here to defer loading heavy tokenizer modules
  * until first use, reducing app startup time from ~8.8s to <1s
  *
+ * Idempotent - safe to call multiple times
+ *
  * @returns Promise that resolves when tokenizer modules are loaded
  */
 export async function loadTokenizerModules(): Promise<void> {
-  if (tokenizerModules) return;
-  if (tokenizerLoadPromise) return tokenizerLoadPromise;
-
-  tokenizerLoadPromise = (async () => {
-    // Performance: lazy load tokenizer modules to reduce startup time from ~8.8s to <1s
-    /* eslint-disable no-restricted-syntax */
-    const [AITokenizerModule, modelsModule, o200k_base, claude] = await Promise.all([
-      import("ai-tokenizer"),
-      import("ai-tokenizer"),
-      import("ai-tokenizer/encoding/o200k_base"),
-      import("ai-tokenizer/encoding/claude"),
-    ]);
-    /* eslint-enable no-restricted-syntax */
-
-    tokenizerModules = {
-      AITokenizer: AITokenizerModule.default,
-      models: modelsModule.models,
-      o200k_base,
-      claude,
-    };
+  const allLoaded =
+    moduleCache.base && KNOWN_ENCODINGS.every((enc) => moduleCache.encodings.has(enc));
+
+  if (allLoaded) {
+    return;
+  }
+
+  if (eagerLoadPromise) {
+    return eagerLoadPromise;
+  }
+
+  logger.info("[tokenizer] loadTokenizerModules() called");
+
+  const timerLabel = "[tokenizer] loadTokenizerModules() total";
+  const work = (async () => {
+    logger.info("[tokenizer] Starting loads for base + encodings:", KNOWN_ENCODINGS);
+    const startMs = now();
+    try {
+      const basePromise = loadBaseModules();
+      const encodingPromises = KNOWN_ENCODINGS.map((enc) => loadEncodingModule(enc));
+      await Promise.all([basePromise, ...encodingPromises]);
+      logger.info("[tokenizer] All modules loaded successfully");
+      notifyIfTokenizerReady();
+    } finally {
+      const durationMs = now() - startMs;
+      logger.info(`${timerLabel} finished in ${durationMs.toFixed(0)}ms`);
+    }
   })();
 
-  return tokenizerLoadPromise;
+  eagerLoadPromise = work
+    .catch((error) => {
+      logger.error("[tokenizer] loadTokenizerModules() failed", error);
+      throw error;
+    })
+    .finally(() => {
+      eagerLoadPromise = null;
+    });
+
+  return eagerLoadPromise;
+}
+
+/**
+ * Load only the tokenizer modules needed for a specific model
+ * More efficient than loadTokenizerModules() if you know the model upfront
+ *
+ * This loads ~50% faster than loadTokenizerModules() since it only loads
+ * the base module + one encoding instead of all encodings.
+ *
+ * @param modelString - Model identifier (e.g., "anthropic:claude-opus-4-1", "openai:gpt-4")
+ */
+export async function loadTokenizerForModel(modelString: string): Promise<void> {
+  const baseModules = await loadBaseModules();
+  assert(baseModules, "Tokenizer base modules must be loaded before selecting encodings");
+
+  const encodingName = getTokenizerEncoding(modelString, baseModules);
+  await loadEncodingModule(encodingName);
+  notifyIfTokenizerReady();
 }
 
 /**
@@ -79,6 +445,41 @@ const tokenCountCache = new LRUCache<number, number>({
   },
 });
 
+interface TokenCountCacheEntry {
+  value: number;
+  cache: boolean;
+}
+
+type TokenCountResult = number | TokenCountCacheEntry;
+
+function normalizeTokenCountResult(result: TokenCountResult): TokenCountCacheEntry {
+  if (typeof result === "number") {
+    assert(Number.isFinite(result), "Token count must be a finite number");
+    assert(result >= 0, "Token count cannot be negative");
+    return { value: result, cache: true };
+  }
+
+  assert(Number.isFinite(result.value), "Token count must be a finite number");
+  assert(result.value >= 0, "Token count cannot be negative");
+  assert(typeof result.cache === "boolean", "Token count cache flag must be boolean");
+  return result;
+}
+
+function isPromiseLike<T>(value: unknown): value is Promise<T> {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    "then" in (value as Record<string, unknown>) &&
+    typeof (value as PromiseLike<T>).then === "function"
+  );
+}
+
+function fallbackTokenCount(text: string): TokenCountCacheEntry {
+  const approximation = Math.ceil(text.length / 4);
+  assert(Number.isFinite(approximation), "Token count approximation must be finite");
+  return { value: approximation, cache: false };
+}
+
 /**
  * Count tokens with caching via CRC32 checksum
  * Avoids re-tokenizing identical strings (system messages, tool definitions, etc.)
@@ -86,7 +487,10 @@ const tokenCountCache = new LRUCache<number, number>({
  * NOTE: For async tokenization, this returns an approximation immediately and caches
  * the accurate count in the background. Subsequent calls will use the cached accurate count.
  */
-function countTokensCached(text: string, tokenizeFn: () => number | Promise<number>): number {
+function countTokensCached(
+  text: string,
+  tokenizeFn: () => TokenCountResult | Promise<TokenCountResult>
+): number {
   const checksum = CRC32.str(text);
   const cached = tokenCountCache.get(checksum);
   if (cached !== undefined) {
@@ -95,23 +499,36 @@ function countTokensCached(text: string, tokenizeFn: () => number | Promise<numb
 
   const result = tokenizeFn();
 
-  // Synchronous case: cache and return immediately
-  if (typeof result === "number") {
-    tokenCountCache.set(checksum, result);
-    return result;
+  if (!isPromiseLike<TokenCountResult>(result)) {
+    const normalized = normalizeTokenCountResult(result);
+    if (normalized.cache) {
+      tokenCountCache.set(checksum, normalized.value);
+    }
+    return normalized.value;
   }
 
   // Async case: return approximation now, cache accurate value when ready
   const approximation = Math.ceil(text.length / 4);
-  void result.then((count) => tokenCountCache.set(checksum, count));
+  void result
+    .then((resolved) => {
+      const normalized = normalizeTokenCountResult(resolved);
+      if (normalized.cache) {
+        tokenCountCache.set(checksum, normalized.value);
+      }
+    })
+    .catch((error) => {
+      logger.error("[tokenizer] Async tokenization failed", error);
+    });
   return approximation;
 }
 
-type TokenizerModules = TokenizerModuleImports;
+type TokenizerModules = TokenizerBaseModules;
 type TokenizerModelRecord = Record<string, { encoding: string } | undefined>;
 
 const FALLBACK_MODEL_KEY = "openai/gpt-4o";
 const FALLBACK_ENCODING = "o200k_base";
+const TOKENIZATION_FALLBACK_MESSAGE =
+  "[tokenizer] Failed to tokenize with loaded modules; returning fallback approximation";
 
 const MODEL_KEY_OVERRIDES: Record<string, string> = {
   "anthropic:claude-sonnet-4-5": "anthropic/claude-sonnet-4.5",
@@ -150,7 +567,8 @@ function resolveTokenizerEncoding(modelString: string, modules: TokenizerModules
 
 function getTokenizerEncoding(modelString: string, modules: TokenizerModules | null): string {
   if (!modules) {
-    return normalizeModelKey(modelString);
+    beginLoadBase();
+    return FALLBACK_ENCODING;
   }
 
   return resolveTokenizerEncoding(modelString, modules);
@@ -158,18 +576,43 @@ function getTokenizerEncoding(modelString: string, modules: TokenizerModules | n
 
 /**
  * Count tokens using loaded tokenizer modules
- * Assumes tokenizerModules is not null
+ * Assumes base module is loaded; encoding will be loaded on-demand via Proxy if needed
  */
 function countTokensWithLoadedModules(
   text: string,
-  modelString: string,
-  modules: NonNullable<typeof tokenizerModules>
-): number {
-  const encodingName = getTokenizerEncoding(modelString, modules);
+  modelString: string
+): TokenCountResult | Promise<TokenCountResult> {
+  const cachedBase = getCachedBaseModules();
+  if (!cachedBase) {
+    return (async () => {
+      const baseModules = await loadBaseModules();
+      const encodingName = getTokenizerEncoding(modelString, baseModules);
+      const encoding = await loadEncodingModule(encodingName);
+      const tokenizer = new baseModules.AITokenizer(encoding);
+      const value = tokenizer.count(text);
+      assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number");
+      return { value, cache: true } satisfies TokenCountCacheEntry;
+    })();
+  }
+
+  const encodingName = getTokenizerEncoding(modelString, cachedBase);
+  const cachedEncoding = getCachedEncoding(encodingName);
+  if (cachedEncoding) {
+    const tokenizer = new cachedBase.AITokenizer(cachedEncoding);
+    const value = tokenizer.count(text);
+    assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number");
+    return { value, cache: true } satisfies TokenCountCacheEntry;
+  }
 
-  const encoding = encodingName === "claude" ? modules.claude : modules.o200k_base;
-  const tokenizer = new modules.AITokenizer(encoding);
-  return tokenizer.count(text);
+  return (async () => {
+    const encoding = await loadEncodingModule(encodingName);
+    const activeBase = getCachedBaseModules();
+    assert(activeBase, "Tokenizer base modules must be available after loading encoding");
+    const tokenizer = new activeBase.AITokenizer(encoding);
+    const value = tokenizer.count(text);
+    assert(Number.isFinite(value) && value >= 0, "Tokenizer must return a non-negative number");
+    return { value, cache: true } satisfies TokenCountCacheEntry;
+  })();
 }
 
 /**
@@ -180,35 +623,30 @@ function countTokensWithLoadedModules(
  */
 export function getTokenizerForModel(modelString: string): Tokenizer {
   // Start loading tokenizer modules in background (idempotent)
-  void loadTokenizerModules();
+  void loadTokenizerModules().catch((error) => {
+    logger.error("[tokenizer] Failed to eagerly load tokenizer modules", error);
+  });
 
   return {
     get encoding() {
-      return getTokenizerEncoding(modelString, tokenizerModules);
+      // NOTE: This Proxy-style getter runs before encodings finish loading; callers must tolerate
+      // fallback values (and potential transient undefined) until onTokenizerEncodingLoaded fires.
+      return getTokenizerEncoding(modelString, moduleCache.base);
     },
     countTokens: (text: string) => {
-      // If tokenizer already loaded, use synchronous path for accurate counts
-      if (tokenizerModules) {
-        return countTokensCached(text, () => {
-          try {
-            return countTokensWithLoadedModules(text, modelString, tokenizerModules!);
-          } catch (error) {
-            // Unexpected error during tokenization, fallback to approximation
-            console.error("Failed to tokenize, falling back to approximation:", error);
-            return Math.ceil(text.length / 4);
-          }
-        });
-      }
-
-      // Tokenizer not yet loaded - use async path (returns approximation immediately)
-      return countTokensCached(text, async () => {
-        await loadTokenizerModules();
+      return countTokensCached(text, () => {
         try {
-          return countTokensWithLoadedModules(text, modelString, tokenizerModules!);
+          const result = countTokensWithLoadedModules(text, modelString);
+          if (isPromiseLike<TokenCountResult>(result)) {
+            return result.catch((error) => {
+              logger.error(TOKENIZATION_FALLBACK_MESSAGE, error);
+              return fallbackTokenCount(text);
+            });
+          }
+          return result;
         } catch (error) {
-          // Unexpected error during tokenization, fallback to approximation
-          console.error("Failed to tokenize, falling back to approximation:", error);
-          return Math.ceil(text.length / 4);
+          logger.error(TOKENIZATION_FALLBACK_MESSAGE, error);
+          return fallbackTokenCount(text);
         }
       });
     },
@@ -223,6 +661,22 @@ export function countTokensForData(data: unknown, tokenizer: Tokenizer): number
   return tokenizer.countTokens(serialized);
 }
 
+/**
+ * Test helper to fully reset tokenizer state between test cases.
+ * Do NOT call from production code.
+ */
+export function __resetTokenizerForTests(): void {
+  moduleCache.base = null;
+  moduleCache.encodings.clear();
+  baseLoadPromise = null;
+  encodingLoadPromises.clear();
+  readyListeners.clear();
+  tokenizerModulesReady = false;
+  encodingListeners.clear();
+  eagerLoadPromise = null;
+  tokenCountCache.clear();
+}
+
 /**
  * Get estimated token count for tool definitions
  * These are the schemas sent to the API for each tool
diff --git a/src/utils/messages/StreamingMessageAggregator.ts b/src/utils/messages/StreamingMessageAggregator.ts
index 1e8b2efb5..b7ea83a1d 100644
--- a/src/utils/messages/StreamingMessageAggregator.ts
+++ b/src/utils/messages/StreamingMessageAggregator.ts
@@ -131,6 +131,17 @@ export class StreamingMessageAggregator {
   }
 
   addMessage(message: CmuxMessage): void {
+    const existing = this.messages.get(message.id);
+    if (existing) {
+      const existingParts = Array.isArray(existing.parts) ? existing.parts.length : 0;
+      const incomingParts = Array.isArray(message.parts) ? message.parts.length : 0;
+
+      // Prefer richer content when duplicates arrive (e.g., placeholder vs completed message)
+      if (incomingParts < existingParts) {
+        return;
+      }
+    }
+
     // Just store the message - backend assigns historySequence
     this.messages.set(message.id, message);
     this.invalidateCache();
diff --git a/src/utils/tokens/TokenStatsWorker.ts b/src/utils/tokens/TokenStatsWorker.ts
index b35c11692..a399badfc 100644
--- a/src/utils/tokens/TokenStatsWorker.ts
+++ b/src/utils/tokens/TokenStatsWorker.ts
@@ -3,9 +3,17 @@
  * Provides a clean async API for calculating stats off the main thread
  */
 
+import assert from "@/utils/assert";
 import type { CmuxMessage } from "@/types/message";
 import type { ChatStats } from "@/types/chatStats";
-import type { WorkerRequest, WorkerResponse, WorkerError } from "./tokenStats.worker";
+import type {
+  WorkerRequest,
+  WorkerResponse,
+  WorkerError,
+  WorkerNotification,
+} from "./tokenStats.worker";
+
+type WorkerMessage = WorkerResponse | WorkerError | WorkerNotification;
 
 /**
  * TokenStatsWorker manages a dedicated Web Worker for calculating token statistics
@@ -19,6 +27,10 @@ export class TokenStatsWorker {
     resolve: (stats: ChatStats) => void;
     reject: (error: Error) => void;
   } | null = null;
+  private readonly tokenizerReadyListeners = new Set<() => void>();
+  private readonly encodingListeners = new Set<(encodingName: string) => void>();
+  private tokenizerReady = false;
+  private readonly loadedEncodings = new Set<string>();
 
   constructor() {
     // Create worker using Vite's Web Worker support
@@ -31,6 +43,41 @@ export class TokenStatsWorker {
     this.worker.onerror = this.handleError.bind(this);
   }
 
+  onTokenizerReady(listener: () => void): () => void {
+    assert(typeof listener === "function", "Tokenizer ready listener must be a function");
+    this.tokenizerReadyListeners.add(listener);
+    if (this.tokenizerReady) {
+      try {
+        listener();
+      } catch (error) {
+        console.error("[TokenStatsWorker] Tokenizer ready listener threw", error);
+      }
+    }
+    return () => {
+      this.tokenizerReadyListeners.delete(listener);
+    };
+  }
+
+  onEncodingLoaded(listener: (encodingName: string) => void): () => void {
+    assert(typeof listener === "function", "Tokenizer encoding listener must be a function");
+    this.encodingListeners.add(listener);
+    if (this.loadedEncodings.size > 0) {
+      for (const encodingName of this.loadedEncodings) {
+        try {
+          listener(encodingName);
+        } catch (error) {
+          console.error(
+            `[TokenStatsWorker] Tokenizer encoding listener threw for '${encodingName}' during replay`,
+            error
+          );
+        }
+      }
+    }
+    return () => {
+      this.encodingListeners.delete(listener);
+    };
+  }
+
   /**
    * Calculate token statistics for the given messages
    * Cancels any pending calculation and starts a new one
@@ -67,9 +114,22 @@ export class TokenStatsWorker {
   /**
    * Handle successful or error responses from worker
    */
-  private handleMessage(e: MessageEvent<WorkerResponse | WorkerError>) {
+  private handleMessage(e: MessageEvent<WorkerMessage>) {
     const response = e.data;
 
+    if ("type" in response) {
+      if (response.type === "tokenizer-ready") {
+        this.notifyTokenizerReady();
+        return;
+      }
+      if (response.type === "encoding-loaded") {
+        this.notifyEncodingLoaded(response.encodingName);
+        return;
+      }
+      assert(false, "Received unknown worker notification type");
+      return;
+    }
+
     // Ignore responses for cancelled requests
     if (!this.pendingRequest || this.pendingRequest.id !== response.id) {
       return;
@@ -104,5 +164,44 @@ export class TokenStatsWorker {
       this.pendingRequest = null;
     }
     this.worker.terminate();
+    this.tokenizerReadyListeners.clear();
+    this.encodingListeners.clear();
+    this.loadedEncodings.clear();
+    this.tokenizerReady = false;
+  }
+
+  private notifyTokenizerReady(): void {
+    this.tokenizerReady = true;
+    if (this.tokenizerReadyListeners.size === 0) {
+      return;
+    }
+    for (const listener of this.tokenizerReadyListeners) {
+      try {
+        listener();
+      } catch (error) {
+        console.error("[TokenStatsWorker] Tokenizer ready listener threw", error);
+      }
+    }
+  }
+
+  private notifyEncodingLoaded(encodingName: string): void {
+    assert(
+      typeof encodingName === "string" && encodingName.length > 0,
+      "Tokenizer encoding notifications require a non-empty encoding name"
+    );
+    this.loadedEncodings.add(encodingName);
+    if (this.encodingListeners.size === 0) {
+      return;
+    }
+    for (const listener of this.encodingListeners) {
+      try {
+        listener(encodingName);
+      } catch (error) {
+        console.error(
+          `[TokenStatsWorker] Tokenizer encoding listener threw for '${encodingName}'`,
+          error
+        );
+      }
+    }
   }
 }
diff --git a/src/utils/tokens/displayUsage.ts b/src/utils/tokens/displayUsage.ts
new file mode 100644
index 000000000..b98c5e771
--- /dev/null
+++ b/src/utils/tokens/displayUsage.ts
@@ -0,0 +1,92 @@
+/**
+ * Display usage utilities for renderer
+ *
+ * IMPORTANT: This file must NOT import tokenizer to avoid pulling Node.js
+ * dependencies into the renderer bundle.
+ */
+
+import type { LanguageModelV2Usage } from "@ai-sdk/provider";
+import { getModelStats } from "./modelStats";
+import type { ChatUsageDisplay } from "./usageAggregator";
+
+/**
+ * Create a display-friendly usage object from AI SDK usage
+ *
+ * This function transforms raw AI SDK usage data into a format suitable
+ * for display in the UI. It does NOT require the tokenizer.
+ */
+export function createDisplayUsage(
+  usage: LanguageModelV2Usage | undefined,
+  model: string,
+  providerMetadata?: Record<string, unknown>
+): ChatUsageDisplay | undefined {
+  if (!usage) return undefined;
+
+  // Provider-specific token handling:
+  // - OpenAI: inputTokens is INCLUSIVE of cachedInputTokens
+  // - Anthropic: inputTokens EXCLUDES cachedInputTokens
+  const cachedTokens = usage.cachedInputTokens ?? 0;
+  const rawInputTokens = usage.inputTokens ?? 0;
+
+  // Detect provider from model string
+  const isOpenAI = model.startsWith("openai:");
+
+  // For OpenAI, subtract cached tokens to get uncached input tokens
+  const inputTokens = isOpenAI ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens;
+
+  // Extract cache creation tokens from provider metadata (Anthropic-specific)
+  const cacheCreateTokens =
+    (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined)
+      ?.cacheCreationInputTokens ?? 0;
+
+  // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific)
+  const reasoningTokens =
+    usage.reasoningTokens ??
+    (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ??
+    0;
+
+  // Calculate output tokens excluding reasoning
+  const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens);
+
+  // Get model stats for cost calculation
+  const modelStats = getModelStats(model);
+
+  // Calculate costs based on model stats (undefined if model unknown)
+  let inputCost: number | undefined;
+  let cachedCost: number | undefined;
+  let cacheCreateCost: number | undefined;
+  let outputCost: number | undefined;
+  let reasoningCost: number | undefined;
+
+  if (modelStats) {
+    inputCost = inputTokens * modelStats.input_cost_per_token;
+    cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
+    cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
+    outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
+    reasoningCost = reasoningTokens * modelStats.output_cost_per_token;
+  }
+
+  return {
+    input: {
+      tokens: inputTokens,
+      cost_usd: inputCost,
+    },
+    cached: {
+      tokens: cachedTokens,
+      cost_usd: cachedCost,
+    },
+    cacheCreate: {
+      tokens: cacheCreateTokens,
+      cost_usd: cacheCreateCost,
+    },
+    output: {
+      tokens: outputWithoutReasoning,
+      cost_usd: outputCost,
+    },
+    reasoning: {
+      tokens: reasoningTokens,
+      cost_usd: reasoningCost,
+    },
+    model, // Include model for display purposes
+  };
+}
diff --git a/src/utils/tokens/tokenStats.worker.ts b/src/utils/tokens/tokenStats.worker.ts
index ce401e19d..4be5e0b7a 100644
--- a/src/utils/tokens/tokenStats.worker.ts
+++ b/src/utils/tokens/tokenStats.worker.ts
@@ -5,6 +5,7 @@
 
 import type { CmuxMessage } from "@/types/message";
 import type { ChatStats } from "@/types/chatStats";
+import { onTokenizerEncodingLoaded, onTokenizerModulesLoaded } from "@/utils/main/tokenizer";
 import { calculateTokenStats } from "./tokenStatsCalculator";
 
 export interface WorkerRequest {
@@ -25,6 +26,10 @@ export interface WorkerError {
   error: string;
 }
 
+export type WorkerNotification =
+  | { type: "tokenizer-ready" }
+  | { type: "encoding-loaded"; encodingName: string };
+
 // Handle incoming calculation requests
 self.onmessage = (e: MessageEvent<WorkerRequest>) => {
   const { id, messages, model } = e.data;
@@ -46,3 +51,19 @@ self.onmessage = (e: MessageEvent<WorkerRequest>) => {
     self.postMessage(errorResponse);
   }
 };
+
+onTokenizerModulesLoaded(() => {
+  const notification: WorkerNotification = { type: "tokenizer-ready" };
+  self.postMessage(notification);
+});
+
+onTokenizerEncodingLoaded((encodingName) => {
+  if (typeof encodingName !== "string" || encodingName.length === 0) {
+    throw new Error("Worker received invalid tokenizer encoding name");
+  }
+  const notification: WorkerNotification = {
+    type: "encoding-loaded",
+    encodingName,
+  };
+  self.postMessage(notification);
+});
diff --git a/src/utils/tokens/tokenStatsCalculator.ts b/src/utils/tokens/tokenStatsCalculator.ts
index 8507be873..3f1542507 100644
--- a/src/utils/tokens/tokenStatsCalculator.ts
+++ b/src/utils/tokens/tokenStatsCalculator.ts
@@ -1,101 +1,23 @@
 /**
- * Shared token statistics calculation logic
- * Used by both frontend (WorkspaceStore) and backend (debug commands)
+ * Main-process-only token statistics calculation logic
+ * Used by backend (debug commands) and worker threads
  *
- * IMPORTANT: This utility is intentionally abstracted so that the debug command
- * (`bun debug costs`) has exact parity with the UI display in the Costs tab.
- * Any changes to token calculation logic should be made here to maintain consistency.
+ * IMPORTANT: This file imports tokenizer and should ONLY be used in main process.
+ * For renderer-safe usage utilities, use displayUsage.ts instead.
  */
 
 import type { CmuxMessage } from "@/types/message";
 import type { ChatStats, TokenConsumer } from "@/types/chatStats";
-import type { LanguageModelV2Usage } from "@ai-sdk/provider";
 import {
   getTokenizerForModel,
   countTokensForData,
   getToolDefinitionTokens,
 } from "@/utils/main/tokenizer";
-import { getModelStats } from "./modelStats";
+import { createDisplayUsage } from "./displayUsage";
 import type { ChatUsageDisplay } from "./usageAggregator";
 
-/**
- * Create a display-friendly usage object from AI SDK usage
- */
-export function createDisplayUsage(
-  usage: LanguageModelV2Usage | undefined,
-  model: string,
-  providerMetadata?: Record<string, unknown>
-): ChatUsageDisplay | undefined {
-  if (!usage) return undefined;
-
-  // Provider-specific token handling:
-  // - OpenAI: inputTokens is INCLUSIVE of cachedInputTokens
-  // - Anthropic: inputTokens EXCLUDES cachedInputTokens
-  const cachedTokens = usage.cachedInputTokens ?? 0;
-  const rawInputTokens = usage.inputTokens ?? 0;
-
-  // Detect provider from model string
-  const isOpenAI = model.startsWith("openai:");
-
-  // For OpenAI, subtract cached tokens to get uncached input tokens
-  const inputTokens = isOpenAI ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens;
-
-  // Extract cache creation tokens from provider metadata (Anthropic-specific)
-  const cacheCreateTokens =
-    (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined)
-      ?.cacheCreationInputTokens ?? 0;
-
-  // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific)
-  const reasoningTokens =
-    usage.reasoningTokens ??
-    (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ??
-    0;
-
-  // Calculate output tokens excluding reasoning
-  const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens);
-
-  // Get model stats for cost calculation
-  const modelStats = getModelStats(model);
-
-  // Calculate costs based on model stats (undefined if model unknown)
-  let inputCost: number | undefined;
-  let cachedCost: number | undefined;
-  let cacheCreateCost: number | undefined;
-  let outputCost: number | undefined;
-  let reasoningCost: number | undefined;
-
-  if (modelStats) {
-    inputCost = inputTokens * modelStats.input_cost_per_token;
-    cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
-    cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
-    outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
-    reasoningCost = reasoningTokens * modelStats.output_cost_per_token;
-  }
-
-  return {
-    input: {
-      tokens: inputTokens,
-      cost_usd: inputCost,
-    },
-    cached: {
-      tokens: cachedTokens,
-      cost_usd: cachedCost,
-    },
-    cacheCreate: {
-      tokens: cacheCreateTokens,
-      cost_usd: cacheCreateCost,
-    },
-    output: {
-      tokens: outputWithoutReasoning,
-      cost_usd: outputCost,
-    },
-    reasoning: {
-      tokens: reasoningTokens,
-      cost_usd: reasoningCost,
-    },
-    model, // Include model for display purposes
-  };
-}
+// Re-export for backward compatibility
+export { createDisplayUsage };
 
 /**
  * Calculate token statistics from raw CmuxMessages
diff --git a/tests/e2e/utils/ui.ts b/tests/e2e/utils/ui.ts
index dec476329..3c504fbb8 100644
--- a/tests/e2e/utils/ui.ts
+++ b/tests/e2e/utils/ui.ts
@@ -165,7 +165,7 @@ export function createWorkspaceUI(page: Page, context: DemoProjectConfig): Works
       action: () => Promise<void>,
       options?: { timeoutMs?: number }
     ): Promise<StreamTimeline> {
-      const timeoutMs = options?.timeoutMs ?? 12_000;
+      const timeoutMs = options?.timeoutMs ?? 20_000;
       const workspaceId = context.workspaceId;
       await page.evaluate((id: string) => {
         type StreamCaptureEvent = {
diff --git a/vite.config.ts b/vite.config.ts
index 5be854261..57d4b34b7 100644
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -43,10 +43,7 @@ const basePlugins = [
 
 export default defineConfig(({ mode }) => ({
   // This prevents mermaid initialization errors in production while allowing dev to work
-  plugins:
-    mode === "development"
-      ? [...basePlugins, topLevelAwait()]
-      : basePlugins,
+  plugins: mode === "development" ? [...basePlugins, topLevelAwait()] : basePlugins,
   resolve: {
     alias,
   },
@@ -62,6 +59,17 @@ export default defineConfig(({ mode }) => ({
         format: "es",
         inlineDynamicImports: false,
         sourcemapExcludeSources: false,
+        manualChunks(id) {
+          const normalizedId = id.split(path.sep).join("/");
+          if (normalizedId.includes("node_modules/ai-tokenizer/encoding/")) {
+            const chunkName = path.basename(id, path.extname(id));
+            return `tokenizer-encoding-${chunkName}`;
+          }
+          if (normalizedId.includes("node_modules/ai-tokenizer/")) {
+            return "tokenizer-base";
+          }
+          return undefined;
+        },
       },
     },
     chunkSizeWarningLimit: 2000,