diff --git a/src/components/AIView.tsx b/src/components/AIView.tsx
index 1fc21b1d3..6d537db85 100644
--- a/src/components/AIView.tsx
+++ b/src/components/AIView.tsx
@@ -13,7 +13,6 @@ import {
   mergeConsecutiveStreamErrors,
 } from "@/utils/messages/messageUtils";
 import { hasInterruptedStream } from "@/utils/messages/retryEligibility";
-import { ChatProvider } from "@/contexts/ChatContext";
 import { ThinkingProvider } from "@/contexts/ThinkingContext";
 import { ModeProvider } from "@/contexts/ModeContext";
 import { formatKeybind, KEYBINDS } from "@/utils/ui/keybinds";
@@ -379,8 +378,7 @@ const AIViewInner: React.FC<AIViewProps> = ({
   }
 
   // Extract state from workspace state
-  const { messages, canInterrupt, isCompacting, loading, cmuxMessages, currentModel } =
-    workspaceState;
+  const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
 
   // Get active stream message ID for token counting
   const activeStreamMessageId = aggregator.getActiveStreamMessageId();
@@ -426,147 +424,143 @@ const AIViewInner: React.FC<AIViewProps> = ({
   }
 
   return (
-    <ChatProvider messages={messages} cmuxMessages={cmuxMessages} model={currentModel ?? "unknown"}>
-      <ViewContainer className={className}>
-        <ChatArea ref={chatAreaRef}>
-          <ViewHeader>
-            <WorkspaceTitle>
-              <StatusIndicator
-                streaming={canInterrupt}
-                title={
-                  canInterrupt && currentModel ? `${getModelName(currentModel)} streaming` : "Idle"
+    <ViewContainer className={className}>
+      <ChatArea ref={chatAreaRef}>
+        <ViewHeader>
+          <WorkspaceTitle>
+            <StatusIndicator
+              streaming={canInterrupt}
+              title={
+                canInterrupt && currentModel ? `${getModelName(currentModel)} streaming` : "Idle"
+              }
+            />
+            <GitStatusIndicator
+              gitStatus={gitStatus}
+              workspaceId={workspaceId}
+              tooltipPosition="bottom"
+            />
+            {projectName} / {branch}
+            <WorkspacePath>{namedWorkspacePath}</WorkspacePath>
+            <TooltipWrapper inline>
+              <TerminalIconButton onClick={handleOpenTerminal}>
+                <svg viewBox="0 0 16 16" fill="currentColor">
+                  <path d="M0 2.75C0 1.784.784 1 1.75 1h12.5c.966 0 1.75.784 1.75 1.75v10.5A1.75 1.75 0 0114.25 15H1.75A1.75 1.75 0 010 13.25V2.75zm1.75-.25a.25.25 0 00-.25.25v10.5c0 .138.112.25.25.25h12.5a.25.25 0 00.25-.25V2.75a.25.25 0 00-.25-.25H1.75zM7.25 8a.75.75 0 01-.22.53l-2.25 2.25a.75.75 0 01-1.06-1.06L5.44 8 3.72 6.28a.75.75 0 111.06-1.06l2.25 2.25c.141.14.22.331.22.53zm1.5 1.5a.75.75 0 000 1.5h3a.75.75 0 000-1.5h-3z" />
+                </svg>
+              </TerminalIconButton>
+              <Tooltip className="tooltip" position="bottom" align="center">
+                Open in terminal ({formatKeybind(KEYBINDS.OPEN_TERMINAL)})
+              </Tooltip>
+            </TooltipWrapper>
+          </WorkspaceTitle>
+        </ViewHeader>
+
+        <OutputContainer>
+          <OutputContent
+            ref={contentRef}
+            onWheel={markUserInteraction}
+            onTouchMove={markUserInteraction}
+            onScroll={handleScroll}
+            role="log"
+            aria-live={canInterrupt ? "polite" : "off"}
+            aria-busy={canInterrupt}
+            aria-label="Conversation transcript"
+            tabIndex={0}
+          >
+            {mergedMessages.length === 0 ? (
+              <EmptyState>
+                <h3>No Messages Yet</h3>
+                <p>Send a message below to begin</p>
+              </EmptyState>
+            ) : (
+              <>
+                {mergedMessages.map((msg) => {
+                  const isAtCutoff =
+                    editCutoffHistoryId !== undefined &&
+                    msg.type !== "history-hidden" &&
+                    msg.historyId === editCutoffHistoryId;
+
+                  return (
+                    <React.Fragment key={msg.id}>
+                      <div
+                        data-message-id={msg.type !== "history-hidden" ? msg.historyId : undefined}
+                      >
+                        <MessageRenderer
+                          message={msg}
+                          onEditUserMessage={handleEditUserMessage}
+                          workspaceId={workspaceId}
+                          isCompacting={isCompacting}
+                        />
+                      </div>
+                      {isAtCutoff && (
+                        <EditBarrier>
+                          ⚠️ Messages below this line will be removed when you submit the edit
+                        </EditBarrier>
+                      )}
+                      {shouldShowInterruptedBarrier(msg) && <InterruptedBarrier />}
+                    </React.Fragment>
+                  );
+                })}
+                {/* Show RetryBarrier after the last message if needed */}
+                {showRetryBarrier && (
+                  <RetryBarrier
+                    workspaceId={workspaceId}
+                    autoRetry={autoRetry}
+                    onStopAutoRetry={() => setAutoRetry(false)}
+                    onResetAutoRetry={() => setAutoRetry(true)}
+                  />
+                )}
+              </>
+            )}
+            <PinnedTodoList workspaceId={workspaceId} />
+            {canInterrupt && (
+              <StreamingBarrier
+                statusText={
+                  isCompacting
+                    ? currentModel
+                      ? `${getModelName(currentModel)} compacting...`
+                      : "compacting..."
+                    : currentModel
+                      ? `${getModelName(currentModel)} streaming...`
+                      : "streaming..."
+                }
+                cancelText={`hit ${formatKeybind(KEYBINDS.INTERRUPT_STREAM)} to cancel`}
+                tokenCount={
+                  activeStreamMessageId
+                    ? aggregator.getStreamingTokenCount(activeStreamMessageId)
+                    : undefined
+                }
+                tps={
+                  activeStreamMessageId
+                    ? aggregator.getStreamingTPS(activeStreamMessageId)
+                    : undefined
                 }
               />
-              <GitStatusIndicator
-                gitStatus={gitStatus}
-                workspaceId={workspaceId}
-                tooltipPosition="bottom"
-              />
-              {projectName} / {branch}
-              <WorkspacePath>{namedWorkspacePath}</WorkspacePath>
-              <TooltipWrapper inline>
-                <TerminalIconButton onClick={handleOpenTerminal}>
-                  <svg viewBox="0 0 16 16" fill="currentColor">
-                    <path d="M0 2.75C0 1.784.784 1 1.75 1h12.5c.966 0 1.75.784 1.75 1.75v10.5A1.75 1.75 0 0114.25 15H1.75A1.75 1.75 0 010 13.25V2.75zm1.75-.25a.25.25 0 00-.25.25v10.5c0 .138.112.25.25.25h12.5a.25.25 0 00.25-.25V2.75a.25.25 0 00-.25-.25H1.75zM7.25 8a.75.75 0 01-.22.53l-2.25 2.25a.75.75 0 01-1.06-1.06L5.44 8 3.72 6.28a.75.75 0 111.06-1.06l2.25 2.25c.141.14.22.331.22.53zm1.5 1.5a.75.75 0 000 1.5h3a.75.75 0 000-1.5h-3z" />
-                  </svg>
-                </TerminalIconButton>
-                <Tooltip className="tooltip" position="bottom" align="center">
-                  Open in terminal ({formatKeybind(KEYBINDS.OPEN_TERMINAL)})
-                </Tooltip>
-              </TooltipWrapper>
-            </WorkspaceTitle>
-          </ViewHeader>
-
-          <OutputContainer>
-            <OutputContent
-              ref={contentRef}
-              onWheel={markUserInteraction}
-              onTouchMove={markUserInteraction}
-              onScroll={handleScroll}
-              role="log"
-              aria-live={canInterrupt ? "polite" : "off"}
-              aria-busy={canInterrupt}
-              aria-label="Conversation transcript"
-              tabIndex={0}
-            >
-              {mergedMessages.length === 0 ? (
-                <EmptyState>
-                  <h3>No Messages Yet</h3>
-                  <p>Send a message below to begin</p>
-                </EmptyState>
-              ) : (
-                <>
-                  {mergedMessages.map((msg) => {
-                    const isAtCutoff =
-                      editCutoffHistoryId !== undefined &&
-                      msg.type !== "history-hidden" &&
-                      msg.historyId === editCutoffHistoryId;
-
-                    return (
-                      <React.Fragment key={msg.id}>
-                        <div
-                          data-message-id={
-                            msg.type !== "history-hidden" ? msg.historyId : undefined
-                          }
-                        >
-                          <MessageRenderer
-                            message={msg}
-                            onEditUserMessage={handleEditUserMessage}
-                            workspaceId={workspaceId}
-                            isCompacting={isCompacting}
-                          />
-                        </div>
-                        {isAtCutoff && (
-                          <EditBarrier>
-                            ⚠️ Messages below this line will be removed when you submit the edit
-                          </EditBarrier>
-                        )}
-                        {shouldShowInterruptedBarrier(msg) && <InterruptedBarrier />}
-                      </React.Fragment>
-                    );
-                  })}
-                  {/* Show RetryBarrier after the last message if needed */}
-                  {showRetryBarrier && (
-                    <RetryBarrier
-                      workspaceId={workspaceId}
-                      autoRetry={autoRetry}
-                      onStopAutoRetry={() => setAutoRetry(false)}
-                      onResetAutoRetry={() => setAutoRetry(true)}
-                    />
-                  )}
-                </>
-              )}
-              <PinnedTodoList workspaceId={workspaceId} />
-              {canInterrupt && (
-                <StreamingBarrier
-                  statusText={
-                    isCompacting
-                      ? currentModel
-                        ? `${getModelName(currentModel)} compacting...`
-                        : "compacting..."
-                      : currentModel
-                        ? `${getModelName(currentModel)} streaming...`
-                        : "streaming..."
-                  }
-                  cancelText={`hit ${formatKeybind(KEYBINDS.INTERRUPT_STREAM)} to cancel`}
-                  tokenCount={
-                    activeStreamMessageId
-                      ? aggregator.getStreamingTokenCount(activeStreamMessageId)
-                      : undefined
-                  }
-                  tps={
-                    activeStreamMessageId
-                      ? aggregator.getStreamingTPS(activeStreamMessageId)
-                      : undefined
-                  }
-                />
-              )}
-            </OutputContent>
-            {!autoScroll && (
-              <JumpToBottomIndicator onClick={jumpToBottom} type="button">
-                Press {formatKeybind(KEYBINDS.JUMP_TO_BOTTOM)} to jump to bottom
-              </JumpToBottomIndicator>
             )}
-          </OutputContainer>
-
-          <ChatInput
-            workspaceId={workspaceId}
-            onMessageSent={handleMessageSent}
-            onTruncateHistory={handleClearHistory}
-            onProviderConfig={handleProviderConfig}
-            disabled={!projectName || !branch}
-            isCompacting={isCompacting}
-            editingMessage={editingMessage}
-            onCancelEdit={handleCancelEdit}
-            onEditLastUserMessage={handleEditLastUserMessage}
-            canInterrupt={canInterrupt}
-            onReady={handleChatInputReady}
-          />
-        </ChatArea>
-
-        <ChatMetaSidebar workspaceId={workspaceId} chatAreaRef={chatAreaRef} />
-      </ViewContainer>
-    </ChatProvider>
+          </OutputContent>
+          {!autoScroll && (
+            <JumpToBottomIndicator onClick={jumpToBottom} type="button">
+              Press {formatKeybind(KEYBINDS.JUMP_TO_BOTTOM)} to jump to bottom
+            </JumpToBottomIndicator>
+          )}
+        </OutputContainer>
+
+        <ChatInput
+          workspaceId={workspaceId}
+          onMessageSent={handleMessageSent}
+          onTruncateHistory={handleClearHistory}
+          onProviderConfig={handleProviderConfig}
+          disabled={!projectName || !branch}
+          isCompacting={isCompacting}
+          editingMessage={editingMessage}
+          onCancelEdit={handleCancelEdit}
+          onEditLastUserMessage={handleEditLastUserMessage}
+          canInterrupt={canInterrupt}
+          onReady={handleChatInputReady}
+        />
+      </ChatArea>
+
+      <ChatMetaSidebar workspaceId={workspaceId} chatAreaRef={chatAreaRef} />
+    </ViewContainer>
   );
 };
 
diff --git a/src/components/ChatMetaSidebar.tsx b/src/components/ChatMetaSidebar.tsx
index 149df7448..69558d7a0 100644
--- a/src/components/ChatMetaSidebar.tsx
+++ b/src/components/ChatMetaSidebar.tsx
@@ -1,7 +1,7 @@
 import React from "react";
 import styled from "@emotion/styled";
 import { usePersistedState } from "@/hooks/usePersistedState";
-import { useChatContext } from "@/contexts/ChatContext";
+import { useWorkspaceUsage } from "@/stores/WorkspaceStore";
 import { use1MContext } from "@/hooks/use1MContext";
 import { useResizeObserver } from "@/hooks/useResizeObserver";
 import { CostsTab } from "./ChatMetaSidebar/CostsTab";
@@ -87,13 +87,13 @@ interface ChatMetaSidebarProps {
   chatAreaRef: React.RefObject<HTMLDivElement>;
 }
 
-export const ChatMetaSidebar: React.FC<ChatMetaSidebarProps> = ({ workspaceId, chatAreaRef }) => {
+const ChatMetaSidebarComponent: React.FC<ChatMetaSidebarProps> = ({ workspaceId, chatAreaRef }) => {
   const [selectedTab, setSelectedTab] = usePersistedState<TabType>(
     `chat-meta-sidebar-tab:${workspaceId}`,
     "costs"
   );
 
-  const { stats } = useChatContext();
+  const usage = useWorkspaceUsage(workspaceId);
   const [use1M] = use1MContext();
   const chatAreaSize = useResizeObserver(chatAreaRef);
 
@@ -103,14 +103,16 @@ export const ChatMetaSidebar: React.FC<ChatMetaSidebarProps> = ({ workspaceId, c
   const costsPanelId = `${baseId}-panel-costs`;
   const toolsPanelId = `${baseId}-panel-tools`;
 
-  const lastUsage = stats?.usageHistory[stats.usageHistory.length - 1];
+  const lastUsage = usage?.usageHistory[usage.usageHistory.length - 1];
 
   // Memoize vertical meter data calculation to prevent unnecessary re-renders
   const verticalMeterData = React.useMemo(() => {
-    return lastUsage && stats
-      ? calculateTokenMeterData(lastUsage, stats.model, use1M, true)
+    // Get model from last usage
+    const model = lastUsage?.model ?? "unknown";
+    return lastUsage
+      ? calculateTokenMeterData(lastUsage, model, use1M, true)
       : { segments: [], totalTokens: 0, totalPercentage: 0 };
-  }, [lastUsage, stats, use1M]);
+  }, [lastUsage, use1M]);
 
   // Calculate if we should show collapsed view with hysteresis
   // Strategy: Observe ChatArea width directly (independent of sidebar width)
@@ -168,7 +170,7 @@ export const ChatMetaSidebar: React.FC<ChatMetaSidebarProps> = ({ workspaceId, c
         <TabContent>
           {selectedTab === "costs" && (
             <div role="tabpanel" id={costsPanelId} aria-labelledby={costsTabId}>
-              <CostsTab />
+              <CostsTab workspaceId={workspaceId} />
             </div>
           )}
           {selectedTab === "tools" && (
@@ -184,3 +186,7 @@ export const ChatMetaSidebar: React.FC<ChatMetaSidebarProps> = ({ workspaceId, c
     </SidebarContainer>
   );
 };
+
+// Memoize to prevent re-renders when parent (AIView) re-renders during streaming
+// Only re-renders when workspaceId or chatAreaRef changes, or internal state updates
+export const ChatMetaSidebar = React.memo(ChatMetaSidebarComponent);
diff --git a/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx b/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx
new file mode 100644
index 000000000..70916e119
--- /dev/null
+++ b/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx
@@ -0,0 +1,189 @@
+import React from "react";
+import styled from "@emotion/styled";
+import type { WorkspaceConsumersState } from "@/stores/WorkspaceStore";
+import { TooltipWrapper, Tooltip, HelpIndicator } from "../Tooltip";
+
+const TokenizerInfo = styled.div`
+  color: #888888;
+  font-size: 12px;
+  margin-bottom: 8px;
+`;
+
+const ConsumerList = styled.div`
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
+`;
+
+const ConsumerRow = styled.div`
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+  margin-bottom: 8px;
+`;
+
+const ConsumerHeader = styled.div`
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 4px;
+`;
+
+const ConsumerName = styled.span`
+  color: #cccccc;
+  font-weight: 500;
+  display: flex;
+  align-items: center;
+  gap: 4px;
+`;
+
+const ConsumerTokens = styled.span`
+  color: #888888;
+  font-size: 12px;
+`;
+
+const PercentageBarWrapper = styled.div`
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+`;
+
+const PercentageBar = styled.div`
+  width: 100%;
+  height: 8px;
+  background: #2a2a2a;
+  border-radius: 4px;
+  overflow: hidden;
+  display: flex;
+`;
+
+interface SegmentProps {
+  percentage: number;
+}
+
+const PercentageFill = styled.div<SegmentProps>`
+  height: 100%;
+  width: ${(props) => props.percentage}%;
+  background: linear-gradient(90deg, #4a9eff 0%, #6b5ce7 100%);
+  transition: width 0.3s ease;
+`;
+
+const FixedSegment = styled.div<SegmentProps>`
+  height: 100%;
+  width: ${(props) => props.percentage}%;
+  background: var(--color-token-fixed);
+  transition: width 0.3s ease;
+`;
+
+const VariableSegment = styled.div<SegmentProps>`
+  height: 100%;
+  width: ${(props) => props.percentage}%;
+  background: var(--color-token-variable);
+  transition: width 0.3s ease;
+`;
+
+const TokenDetails = styled.div`
+  color: #666666;
+  font-size: 11px;
+  text-align: left;
+`;
+
+const LoadingState = styled.div`
+  color: #888888;
+  font-style: italic;
+  padding: 12px 0;
+`;
+
+const EmptyState = styled.div`
+  color: #666666;
+  font-style: italic;
+  padding: 12px 0;
+  text-align: left;
+
+  p {
+    margin: 4px 0;
+  }
+`;
+
+// Format token display - show k for thousands with 1 decimal
+const formatTokens = (tokens: number) =>
+  tokens >= 1000 ? `${(tokens / 1000).toFixed(1)}k` : tokens.toLocaleString();
+
+interface ConsumerBreakdownProps {
+  consumers: WorkspaceConsumersState;
+}
+
+const ConsumerBreakdownComponent: React.FC<ConsumerBreakdownProps> = ({ consumers }) => {
+  if (consumers.isCalculating) {
+    return <LoadingState>Calculating consumer breakdown...</LoadingState>;
+  }
+
+  if (consumers.consumers.length === 0) {
+    return <EmptyState>No consumer data available</EmptyState>;
+  }
+
+  return (
+    <>
+      <TokenizerInfo>
+        Tokenizer: <span>{consumers.tokenizerName}</span>
+      </TokenizerInfo>
+      <ConsumerList>
+        {consumers.consumers.map((consumer) => {
+          // Calculate percentages for fixed and variable segments
+          const fixedPercentage = consumer.fixedTokens
+            ? (consumer.fixedTokens / consumers.totalTokens) * 100
+            : 0;
+          const variablePercentage = consumer.variableTokens
+            ? (consumer.variableTokens / consumers.totalTokens) * 100
+            : 0;
+
+          const tokenDisplay = formatTokens(consumer.tokens);
+
+          return (
+            <ConsumerRow key={consumer.name}>
+              <ConsumerHeader>
+                <ConsumerName>
+                  {consumer.name}
+                  {consumer.name === "web_search" && (
+                    <TooltipWrapper inline>
+                      <HelpIndicator>?</HelpIndicator>
+                      <Tooltip className="tooltip" align="center" width="wide">
+                        Web search results are encrypted and decrypted server-side. This estimate is
+                        approximate.
+                      </Tooltip>
+                    </TooltipWrapper>
+                  )}
+                </ConsumerName>
+                <ConsumerTokens>
+                  {tokenDisplay} ({consumer.percentage.toFixed(1)}%)
+                </ConsumerTokens>
+              </ConsumerHeader>
+              <PercentageBarWrapper>
+                <PercentageBar>
+                  {consumer.fixedTokens && consumer.variableTokens ? (
+                    <>
+                      <FixedSegment percentage={fixedPercentage} />
+                      <VariableSegment percentage={variablePercentage} />
+                    </>
+                  ) : (
+                    <PercentageFill percentage={consumer.percentage} />
+                  )}
+                </PercentageBar>
+                {consumer.fixedTokens && consumer.variableTokens && (
+                  <TokenDetails>
+                    Tool definition: {formatTokens(consumer.fixedTokens)} • Usage:{" "}
+                    {formatTokens(consumer.variableTokens)}
+                  </TokenDetails>
+                )}
+              </PercentageBarWrapper>
+            </ConsumerRow>
+          );
+        })}
+      </ConsumerList>
+    </>
+  );
+};
+
+// Memoize to prevent re-renders when parent re-renders but consumers data hasn't changed
+// Only re-renders when consumers object reference changes (when store bumps it)
+export const ConsumerBreakdown = React.memo(ConsumerBreakdownComponent);
diff --git a/src/components/ChatMetaSidebar/CostsTab.tsx b/src/components/ChatMetaSidebar/CostsTab.tsx
index 8de087c79..1800555dc 100644
--- a/src/components/ChatMetaSidebar/CostsTab.tsx
+++ b/src/components/ChatMetaSidebar/CostsTab.tsx
@@ -1,7 +1,6 @@
 import React from "react";
 import styled from "@emotion/styled";
-import { useChatContext } from "@/contexts/ChatContext";
-import { TooltipWrapper, Tooltip, HelpIndicator } from "../Tooltip";
+import { useWorkspaceUsage, useWorkspaceConsumers } from "@/stores/WorkspaceStore";
 import { getModelStats } from "@/utils/tokens/modelStats";
 import { sumUsageHistory } from "@/utils/tokens/usageAggregator";
 import { usePersistedState } from "@/hooks/usePersistedState";
@@ -9,6 +8,7 @@ import { ToggleGroup, type ToggleOption } from "../ToggleGroup";
 import { use1MContext } from "@/hooks/use1MContext";
 import { supports1MContext } from "@/utils/ai/models";
 import { TOKEN_COMPONENT_COLORS } from "@/utils/tokens/tokenMeterUtils";
+import { ConsumerBreakdown } from "./ConsumerBreakdown";
 
 const Container = styled.div`
   color: #d4d4d4;
@@ -17,8 +17,9 @@ const Container = styled.div`
   line-height: 1.6;
 `;
 
-const Section = styled.div`
-  margin-bottom: 24px;
+const Section = styled.div<{ marginTop?: string; marginBottom?: string }>`
+  margin-bottom: ${(props) => props.marginBottom ?? "24px"};
+  margin-top: ${(props) => props.marginTop ?? "0"};
 `;
 
 const SectionTitle = styled.h3<{ dimmed?: boolean }>`
@@ -30,12 +31,6 @@ const SectionTitle = styled.h3<{ dimmed?: boolean }>`
   letter-spacing: 0.5px;
 `;
 
-const TokenizerInfo = styled.div`
-  color: #888888;
-  font-size: 12px;
-  margin-bottom: 8px;
-`;
-
 const ConsumerList = styled.div`
   display: flex;
   flex-direction: column;
@@ -87,20 +82,6 @@ interface SegmentProps {
   percentage: number;
 }
 
-const FixedSegment = styled.div<SegmentProps>`
-  height: 100%;
-  width: ${(props) => props.percentage}%;
-  background: var(--color-token-fixed);
-  transition: width 0.3s ease;
-`;
-
-const VariableSegment = styled.div<SegmentProps>`
-  height: 100%;
-  width: ${(props) => props.percentage}%;
-  background: var(--color-token-variable);
-  transition: width 0.3s ease;
-`;
-
 const InputSegment = styled.div<SegmentProps>`
   height: 100%;
   width: ${(props) => props.percentage}%;
@@ -129,22 +110,6 @@ const CachedSegment = styled.div<SegmentProps>`
   transition: width 0.3s ease;
 `;
 
-interface PercentageFillProps {
-  percentage: number;
-}
-
-const PercentageFill = styled.div<PercentageFillProps>`
-  height: 100%;
-  width: ${(props) => props.percentage}%;
-  background: var(--color-token-completion);
-  transition: width 0.3s ease;
-`;
-
-const LoadingState = styled.div`
-  color: #888888;
-  font-style: italic;
-`;
-
 const EmptyState = styled.div`
   color: #888888;
   text-align: center;
@@ -158,14 +123,6 @@ const ModelWarning = styled.div`
   font-style: italic;
 `;
 
-const TokenDetails = styled.div`
-  color: #888888;
-  font-size: 11px;
-  margin-top: 6px;
-  padding-left: 4px;
-  line-height: 1.4;
-`;
-
 const DetailsTable = styled.table`
   width: 100%;
   margin-top: 4px;
@@ -222,13 +179,6 @@ const DimmedCost = styled.span`
   font-style: italic;
 `;
 
-const SectionHeader = styled.div`
-  display: flex;
-  justify-content: flex-start;
-  align-items: center;
-  margin-bottom: 12px;
-`;
-
 // Format token display - show k for thousands with 1 decimal
 const formatTokens = (tokens: number) =>
   tokens >= 1000 ? `${(tokens / 1000).toFixed(1)}k` : tokens.toLocaleString();
@@ -267,25 +217,27 @@ const calculateElevatedCost = (tokens: number, standardRate: number, isInput: bo
 type ViewMode = "last-request" | "session";
 
 const VIEW_MODE_OPTIONS: Array<ToggleOption<ViewMode>> = [
-  { value: "last-request", label: "Last Request" },
   { value: "session", label: "Session" },
+  { value: "last-request", label: "Last Request" },
 ];
 
-export const CostsTab: React.FC = () => {
-  const { stats, isCalculating } = useChatContext();
-  const [viewMode, setViewMode] = usePersistedState<ViewMode>("costsTab:viewMode", "last-request");
+interface CostsTabProps {
+  workspaceId: string;
+}
+
+const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
+  const usage = useWorkspaceUsage(workspaceId);
+  const consumers = useWorkspaceConsumers(workspaceId);
+  const [viewMode, setViewMode] = usePersistedState<ViewMode>("costsTab:viewMode", "session");
   const [use1M] = use1MContext();
 
-  // Only show loading if we don't have any stats yet
-  if (isCalculating && !stats) {
-    return (
-      <Container>
-        <LoadingState>Calculating token usage...</LoadingState>
-      </Container>
-    );
-  }
+  // Check if we have any data to display
+  const hasUsageData = usage && usage.usageHistory.length > 0;
+  const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating);
+  const hasAnyData = hasUsageData || hasConsumerData;
 
-  if (!stats || stats.totalTokens === 0) {
+  // Only show empty state if truly no data anywhere
+  if (!hasAnyData) {
     return (
       <Container>
         <EmptyState>
@@ -296,37 +248,46 @@ export const CostsTab: React.FC = () => {
     );
   }
 
-  // Compute displayUsage based on view mode
+  // Context Usage always shows Last Request data
+  const lastRequestUsage = hasUsageData
+    ? usage.usageHistory[usage.usageHistory.length - 1]
+    : undefined;
+
+  // Cost and Details table use viewMode
   const displayUsage =
     viewMode === "last-request"
-      ? stats.usageHistory[stats.usageHistory.length - 1]
-      : sumUsageHistory(stats.usageHistory);
+      ? usage.usageHistory[usage.usageHistory.length - 1]
+      : sumUsageHistory(usage.usageHistory);
 
   return (
     <Container>
-      {stats.usageHistory.length > 0 && (
-        <Section>
-          <SectionHeader>
-            <ToggleGroup options={VIEW_MODE_OPTIONS} value={viewMode} onChange={setViewMode} />
-          </SectionHeader>
-          <ConsumerList>
+      {hasUsageData && (
+        <Section data-testid="context-usage-section" marginTop="8px" marginBottom="20px">
+          <ConsumerList data-testid="context-usage-list">
             {(() => {
+              // Context Usage always uses last request
+              const contextUsage = lastRequestUsage;
+
+              // Get model from last request (for context window display)
+              const model = lastRequestUsage?.model ?? "unknown";
+
               // Get max tokens for the model from the model stats database
-              const modelStats = getModelStats(stats.model);
+              const modelStats = getModelStats(model);
               const baseMaxTokens = modelStats?.max_input_tokens;
               // Check if 1M context is active and supported
-              const is1MActive = use1M && supports1MContext(stats.model);
+              const is1MActive = use1M && supports1MContext(model);
               const maxTokens = is1MActive ? 1_000_000 : baseMaxTokens;
+
               // Total tokens includes cache creation (they're input tokens sent for caching)
-              const totalUsed = displayUsage
-                ? displayUsage.input.tokens +
-                  displayUsage.cached.tokens +
-                  displayUsage.cacheCreate.tokens +
-                  displayUsage.output.tokens +
-                  displayUsage.reasoning.tokens
+              const totalUsed = contextUsage
+                ? contextUsage.input.tokens +
+                  contextUsage.cached.tokens +
+                  contextUsage.cacheCreate.tokens +
+                  contextUsage.output.tokens +
+                  contextUsage.reasoning.tokens
                 : 0;
 
-              // Calculate percentages
+              // Calculate percentages based on max tokens (actual context window usage)
               let inputPercentage: number;
               let outputPercentage: number;
               let cachedPercentage: number;
@@ -335,34 +296,25 @@ export const CostsTab: React.FC = () => {
               let showWarning = false;
               let totalPercentage: number;
 
-              // For session mode, always show bar as full (100%) based on relative token distribution
-              if (viewMode === "session" && displayUsage && totalUsed > 0) {
-                // Scale to total tokens used (bar always full)
-                inputPercentage = (displayUsage.input.tokens / totalUsed) * 100;
-                outputPercentage = (displayUsage.output.tokens / totalUsed) * 100;
-                cachedPercentage = (displayUsage.cached.tokens / totalUsed) * 100;
-                cacheCreatePercentage = (displayUsage.cacheCreate.tokens / totalUsed) * 100;
-                reasoningPercentage = (displayUsage.reasoning.tokens / totalUsed) * 100;
-                totalPercentage = 100;
-              } else if (maxTokens && displayUsage) {
+              if (maxTokens && contextUsage) {
                 // We know the model's max tokens - show actual context window usage
-                inputPercentage = (displayUsage.input.tokens / maxTokens) * 100;
-                outputPercentage = (displayUsage.output.tokens / maxTokens) * 100;
-                cachedPercentage = (displayUsage.cached.tokens / maxTokens) * 100;
-                cacheCreatePercentage = (displayUsage.cacheCreate.tokens / maxTokens) * 100;
-                reasoningPercentage = (displayUsage.reasoning.tokens / maxTokens) * 100;
+                inputPercentage = (contextUsage.input.tokens / maxTokens) * 100;
+                outputPercentage = (contextUsage.output.tokens / maxTokens) * 100;
+                cachedPercentage = (contextUsage.cached.tokens / maxTokens) * 100;
+                cacheCreatePercentage = (contextUsage.cacheCreate.tokens / maxTokens) * 100;
+                reasoningPercentage = (contextUsage.reasoning.tokens / maxTokens) * 100;
                 totalPercentage = (totalUsed / maxTokens) * 100;
-              } else if (displayUsage) {
+              } else if (contextUsage) {
                 // Unknown model - scale to total tokens used
-                inputPercentage = totalUsed > 0 ? (displayUsage.input.tokens / totalUsed) * 100 : 0;
+                inputPercentage = totalUsed > 0 ? (contextUsage.input.tokens / totalUsed) * 100 : 0;
                 outputPercentage =
-                  totalUsed > 0 ? (displayUsage.output.tokens / totalUsed) * 100 : 0;
+                  totalUsed > 0 ? (contextUsage.output.tokens / totalUsed) * 100 : 0;
                 cachedPercentage =
-                  totalUsed > 0 ? (displayUsage.cached.tokens / totalUsed) * 100 : 0;
+                  totalUsed > 0 ? (contextUsage.cached.tokens / totalUsed) * 100 : 0;
                 cacheCreatePercentage =
-                  totalUsed > 0 ? (displayUsage.cacheCreate.tokens / totalUsed) * 100 : 0;
+                  totalUsed > 0 ? (contextUsage.cacheCreate.tokens / totalUsed) * 100 : 0;
                 reasoningPercentage =
-                  totalUsed > 0 ? (displayUsage.reasoning.tokens / totalUsed) * 100 : 0;
+                  totalUsed > 0 ? (contextUsage.reasoning.tokens / totalUsed) * 100 : 0;
                 totalPercentage = 100;
                 showWarning = true;
               } else {
@@ -375,10 +327,52 @@ export const CostsTab: React.FC = () => {
               }
 
               const totalDisplay = formatTokens(totalUsed);
-              // For session mode, don't show max tokens or percentage
-              const maxDisplay =
-                viewMode === "session" ? "" : maxTokens ? ` / ${formatTokens(maxTokens)}` : "";
-              const showPercentage = viewMode !== "session";
+              const maxDisplay = maxTokens ? ` / ${formatTokens(maxTokens)}` : "";
+
+              return (
+                <>
+                  <ConsumerRow data-testid="context-usage">
+                    <ConsumerHeader>
+                      <ConsumerName>Context Usage</ConsumerName>
+                      <ConsumerTokens>
+                        {totalDisplay}
+                        {maxDisplay}
+                        {` (${totalPercentage.toFixed(1)}%)`}
+                      </ConsumerTokens>
+                    </ConsumerHeader>
+                    <PercentageBarWrapper>
+                      <PercentageBar>
+                        {cachedPercentage > 0 && <CachedSegment percentage={cachedPercentage} />}
+                        {cacheCreatePercentage > 0 && (
+                          <CachedSegment percentage={cacheCreatePercentage} />
+                        )}
+                        <InputSegment percentage={inputPercentage} />
+                        <OutputSegment percentage={outputPercentage} />
+                        {reasoningPercentage > 0 && (
+                          <ThinkingSegment percentage={reasoningPercentage} />
+                        )}
+                      </PercentageBar>
+                    </PercentageBarWrapper>
+                  </ConsumerRow>
+                  {showWarning && (
+                    <ModelWarning>Unknown model limits - showing relative usage only</ModelWarning>
+                  )}
+                </>
+              );
+            })()}
+          </ConsumerList>
+        </Section>
+      )}
+
+      {hasUsageData && (
+        <Section data-testid="cost-section">
+          <ConsumerList>
+            {(() => {
+              // Cost and Details use viewMode-dependent data
+              // Get model from the displayUsage (which could be last request or session sum)
+              const model = displayUsage?.model ?? lastRequestUsage?.model ?? "unknown";
+              const modelStats = getModelStats(model);
+              const is1MActive = use1M && supports1MContext(model);
 
               // Helper to calculate cost percentage
               const getCostPercentage = (cost: number | undefined, total: number | undefined) =>
@@ -481,33 +475,17 @@ export const CostsTab: React.FC = () => {
 
               return (
                 <>
-                  <ConsumerRow>
-                    <ConsumerHeader>
-                      <ConsumerName>Token Usage</ConsumerName>
-                      <ConsumerTokens>
-                        {totalDisplay}
-                        {maxDisplay}
-                        {showPercentage && ` (${totalPercentage.toFixed(1)}%)`}
-                      </ConsumerTokens>
-                    </ConsumerHeader>
-                    <PercentageBarWrapper>
-                      <PercentageBar>
-                        {cachedPercentage > 0 && <CachedSegment percentage={cachedPercentage} />}
-                        {cacheCreatePercentage > 0 && (
-                          <CachedSegment percentage={cacheCreatePercentage} />
-                        )}
-                        <InputSegment percentage={inputPercentage} />
-                        <OutputSegment percentage={outputPercentage} />
-                        {reasoningPercentage > 0 && (
-                          <ThinkingSegment percentage={reasoningPercentage} />
-                        )}
-                      </PercentageBar>
-                    </PercentageBarWrapper>
-                  </ConsumerRow>
                   {totalCost !== undefined && totalCost >= 0 && (
-                    <ConsumerRow>
-                      <ConsumerHeader>
-                        <ConsumerName>Cost</ConsumerName>
+                    <ConsumerRow data-testid="cost-bar">
+                      <ConsumerHeader data-testid="cost-header" style={{ marginBottom: "8px" }}>
+                        <div style={{ display: "flex", gap: "12px", alignItems: "center" }}>
+                          <ConsumerName>Cost</ConsumerName>
+                          <ToggleGroup
+                            options={VIEW_MODE_OPTIONS}
+                            value={viewMode}
+                            onChange={setViewMode}
+                          />
+                        </div>
                         <ConsumerTokens>{formatCostWithDollar(totalCost)}</ConsumerTokens>
                       </ConsumerHeader>
                       <PercentageBarWrapper>
@@ -527,7 +505,7 @@ export const CostsTab: React.FC = () => {
                       </PercentageBarWrapper>
                     </ConsumerRow>
                   )}
-                  <DetailsTable>
+                  <DetailsTable data-testid="cost-details">
                     <thead>
                       <DetailsHeaderRow>
                         <DetailsHeader>Component</DetailsHeader>
@@ -559,9 +537,6 @@ export const CostsTab: React.FC = () => {
                       })}
                     </tbody>
                   </DetailsTable>
-                  {showWarning && (
-                    <ModelWarning>Unknown model limits - showing relative usage only</ModelWarning>
-                  )}
                 </>
               );
             })()}
@@ -571,63 +546,12 @@ export const CostsTab: React.FC = () => {
 
       <Section>
         <SectionTitle dimmed>Breakdown by Consumer</SectionTitle>
-        <TokenizerInfo>
-          Tokenizer: <span>{stats.tokenizerName}</span>
-        </TokenizerInfo>
-        <ConsumerList>
-          {stats.consumers.map((consumer) => {
-            // Calculate percentages for fixed and variable segments
-            const fixedPercentage = consumer.fixedTokens
-              ? (consumer.fixedTokens / stats.totalTokens) * 100
-              : 0;
-            const variablePercentage = consumer.variableTokens
-              ? (consumer.variableTokens / stats.totalTokens) * 100
-              : 0;
-
-            const tokenDisplay = formatTokens(consumer.tokens);
-
-            return (
-              <ConsumerRow key={consumer.name}>
-                <ConsumerHeader>
-                  <ConsumerName>
-                    {consumer.name}
-                    {consumer.name === "web_search" && (
-                      <TooltipWrapper inline>
-                        <HelpIndicator>?</HelpIndicator>
-                        <Tooltip className="tooltip" align="center" width="wide">
-                          Web search results are encrypted and decrypted server-side. This estimate
-                          is approximate.
-                        </Tooltip>
-                      </TooltipWrapper>
-                    )}
-                  </ConsumerName>
-                  <ConsumerTokens>
-                    {tokenDisplay} ({consumer.percentage.toFixed(1)}%)
-                  </ConsumerTokens>
-                </ConsumerHeader>
-                <PercentageBarWrapper>
-                  <PercentageBar>
-                    {consumer.fixedTokens && consumer.variableTokens ? (
-                      <>
-                        <FixedSegment percentage={fixedPercentage} />
-                        <VariableSegment percentage={variablePercentage} />
-                      </>
-                    ) : (
-                      <PercentageFill percentage={consumer.percentage} />
-                    )}
-                  </PercentageBar>
-                  {consumer.fixedTokens && consumer.variableTokens && (
-                    <TokenDetails>
-                      Tool definition: {formatTokens(consumer.fixedTokens)} • Usage:{" "}
-                      {formatTokens(consumer.variableTokens)}
-                    </TokenDetails>
-                  )}
-                </PercentageBarWrapper>
-              </ConsumerRow>
-            );
-          })}
-        </ConsumerList>
+        <ConsumerBreakdown consumers={consumers} />
       </Section>
     </Container>
   );
 };
+
+// Memoize to prevent re-renders when parent (AIView) re-renders during streaming
+// Only re-renders when workspaceId changes or internal hook data (usage/consumers) updates
+export const CostsTab = React.memo(CostsTabComponent);
diff --git a/src/contexts/ChatContext.tsx b/src/contexts/ChatContext.tsx
deleted file mode 100644
index 3a64187be..000000000
--- a/src/contexts/ChatContext.tsx
+++ /dev/null
@@ -1,103 +0,0 @@
-import type { ReactNode } from "react";
-import React, { createContext, useContext, useState, useEffect, useRef } from "react";
-import type { CmuxMessage, DisplayedMessage } from "@/types/message";
-import type { ChatStats } from "@/types/chatStats";
-import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker";
-
-interface ChatContextType {
-  messages: DisplayedMessage[];
-  stats: ChatStats | null;
-  isCalculating: boolean;
-}
-
-const ChatContext = createContext<ChatContextType | undefined>(undefined);
-
-interface ChatProviderProps {
-  children: ReactNode;
-  messages: DisplayedMessage[];
-  cmuxMessages: CmuxMessage[];
-  model: string;
-}
-
-export const ChatProvider: React.FC<ChatProviderProps> = ({
-  children,
-  messages,
-  cmuxMessages,
-  model,
-}) => {
-  const [stats, setStats] = useState<ChatStats | null>(null);
-  const [isCalculating, setIsCalculating] = useState(false);
-  // Track if we've already scheduled a calculation to prevent timer spam
-  const calculationScheduledRef = useRef(false);
-  // Web Worker for off-thread token calculation
-  const workerRef = useRef<TokenStatsWorker | null>(null);
-
-  // Initialize worker once
-  useEffect(() => {
-    workerRef.current = new TokenStatsWorker();
-    return () => {
-      workerRef.current?.terminate();
-      workerRef.current = null;
-    };
-  }, []);
-
-  useEffect(() => {
-    if (cmuxMessages.length === 0) {
-      setStats({
-        consumers: [],
-        totalTokens: 0,
-        model,
-        tokenizerName: "No messages",
-        usageHistory: [],
-      });
-      return;
-    }
-
-    // IMPORTANT: Prevent duplicate timers during rapid events (reasoning deltas)
-    // During message loading, 600+ reasoning-delta events fire rapidly, each triggering
-    // this effect. Without this guard, we'd start 600 timers that all eventually run!
-    if (calculationScheduledRef.current) return;
-
-    calculationScheduledRef.current = true;
-
-    // Show calculating state immediately (safe now that aggregator cache provides stable refs)
-    setIsCalculating(true);
-
-    // Debounce calculation by 100ms to avoid blocking on rapid updates
-    const timeoutId = setTimeout(() => {
-      // Calculate stats in Web Worker (off main thread)
-      workerRef.current
-        ?.calculate(cmuxMessages, model)
-        .then((calculatedStats) => {
-          setStats(calculatedStats);
-        })
-        .catch((error) => {
-          console.error("Failed to calculate token stats:", error);
-        })
-        .finally(() => {
-          setIsCalculating(false);
-          calculationScheduledRef.current = false;
-        });
-    }, 100);
-
-    return () => {
-      clearTimeout(timeoutId);
-      calculationScheduledRef.current = false;
-      setIsCalculating(false);
-    };
-  }, [cmuxMessages, model]);
-
-  return (
-    <ChatContext.Provider value={{ messages, stats, isCalculating }}>
-      {children}
-    </ChatContext.Provider>
-  );
-};
-
-export const useChatContext = () => {
-  const context = useContext(ChatContext);
-  if (!context) {
-    throw new Error("useChatContext must be used within a ChatProvider");
-  }
-  return context;
-};
diff --git a/src/stores/WorkspaceConsumerManager.ts b/src/stores/WorkspaceConsumerManager.ts
new file mode 100644
index 000000000..628bc7f86
--- /dev/null
+++ b/src/stores/WorkspaceConsumerManager.ts
@@ -0,0 +1,229 @@
+import type { WorkspaceConsumersState } from "./WorkspaceStore";
+import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker";
+import type { StreamingMessageAggregator } from "@/utils/messages/StreamingMessageAggregator";
+
+/**
+ * Manages consumer token calculations for workspaces.
+ *
+ * Responsibilities:
+ * - Debounces rapid calculation requests (e.g., multiple tool-call-end events)
+ * - Caches calculated results to avoid redundant work (source of truth)
+ * - Tracks calculation state per workspace
+ * - Executes Web Worker tokenization calculations
+ * - Handles cleanup and disposal
+ *
+ * Architecture:
+ * - Single responsibility: consumer tokenization calculations
+ * - Owns the source-of-truth cache (calculated consumer data)
+ * - WorkspaceStore orchestrates (decides when to calculate)
+ * - This manager executes (performs calculations, manages cache)
+ *
+ * Dual-Cache Design:
+ * - WorkspaceConsumerManager.cache: Source of truth for calculated data
+ * - WorkspaceStore.consumersStore (MapStore): Subscription management only
+ *   (components subscribe to workspace changes, delegates to manager for state)
+ */
+export class WorkspaceConsumerManager {
+  // Web Worker for tokenization (shared across workspaces)
+  private readonly tokenWorker: TokenStatsWorker;
+
+  // Track scheduled calculations (in debounce window, not yet executing)
+  private scheduledCalcs = new Set<string>();
+
+  // Track executing calculations (Web Worker running)
+  private pendingCalcs = new Set<string>();
+
+  // Track workspaces that need recalculation after current one completes
+  private needsRecalc = new Map<string, StreamingMessageAggregator>();
+
+  // Cache calculated consumer data (persists across bumps)
+  private cache = new Map<string, WorkspaceConsumersState>();
+
+  // Debounce timers for consumer calculations (prevents rapid-fire during tool sequences)
+  private debounceTimers = new Map<string, NodeJS.Timeout>();
+
+  // Callback to bump the store when calculation completes
+  private readonly onCalculationComplete: (workspaceId: string) => void;
+
+  constructor(onCalculationComplete: (workspaceId: string) => void) {
+    this.tokenWorker = new TokenStatsWorker();
+    this.onCalculationComplete = onCalculationComplete;
+  }
+
+  /**
+   * Get cached state without side effects.
+   * Returns null if no cache exists.
+   */
+  getCachedState(workspaceId: string): WorkspaceConsumersState | null {
+    return this.cache.get(workspaceId) ?? null;
+  }
+
+  /**
+   * Check if calculation is pending or scheduled for workspace.
+   */
+  isPending(workspaceId: string): boolean {
+    return this.scheduledCalcs.has(workspaceId) || this.pendingCalcs.has(workspaceId);
+  }
+
+  /**
+   * Get current state synchronously without triggering calculations.
+   * Returns cached result if available, otherwise returns default state.
+   *
+   * Note: This is called from WorkspaceStore.getWorkspaceConsumers(),
+   * which handles the lazy trigger logic separately.
+   */
+  getStateSync(workspaceId: string): WorkspaceConsumersState {
+    const cached = this.cache.get(workspaceId);
+    if (cached) {
+      return cached;
+    }
+
+    // Default state while scheduled/calculating or before first calculation
+    return {
+      consumers: [],
+      tokenizerName: "",
+      totalTokens: 0,
+      isCalculating: this.scheduledCalcs.has(workspaceId) || this.pendingCalcs.has(workspaceId),
+    };
+  }
+
+  /**
+   * Schedule a consumer calculation (debounced).
+   * Batches rapid events (e.g., multiple tool-call-end) into single calculation.
+   * Marks as "calculating" immediately to prevent UI flash.
+   *
+   * If a calculation is already running, marks workspace for recalculation
+   * after the current one completes.
+   */
+  scheduleCalculation(workspaceId: string, aggregator: StreamingMessageAggregator): void {
+    // Clear existing timer for this workspace
+    const existingTimer = this.debounceTimers.get(workspaceId);
+    if (existingTimer) {
+      clearTimeout(existingTimer);
+    }
+
+    // If already executing, queue a follow-up recalculation
+    if (this.pendingCalcs.has(workspaceId)) {
+      this.needsRecalc.set(workspaceId, aggregator);
+      return;
+    }
+
+    // Mark as scheduled immediately (triggers "Calculating..." UI, prevents flash)
+    const isNewSchedule = !this.scheduledCalcs.has(workspaceId);
+    this.scheduledCalcs.add(workspaceId);
+
+    // Notify store if newly scheduled (triggers UI update)
+    if (isNewSchedule) {
+      this.onCalculationComplete(workspaceId);
+    }
+
+    // Set new timer (150ms - imperceptible to humans, batches rapid events)
+    const timer = setTimeout(() => {
+      this.debounceTimers.delete(workspaceId);
+      this.scheduledCalcs.delete(workspaceId); // Move from scheduled to pending
+      this.executeCalculation(workspaceId, aggregator);
+    }, 150);
+
+    this.debounceTimers.set(workspaceId, timer);
+  }
+
+  /**
+   * Execute background consumer calculation.
+   * Only one calculation per workspace at a time.
+   */
+  private executeCalculation(workspaceId: string, aggregator: StreamingMessageAggregator): void {
+    // Skip if already calculating
+    if (this.pendingCalcs.has(workspaceId)) {
+      return;
+    }
+
+    this.pendingCalcs.add(workspaceId);
+
+    // Mark as calculating and notify store
+    this.onCalculationComplete(workspaceId);
+
+    // Run in next tick to avoid blocking caller
+    void (async () => {
+      try {
+        const messages = aggregator.getAllMessages();
+        const model = aggregator.getCurrentModel() ?? "unknown";
+
+        // Calculate in Web Worker (off main thread)
+        const fullStats = await this.tokenWorker.calculate(messages, model);
+
+        // Store result in cache
+        this.cache.set(workspaceId, {
+          consumers: fullStats.consumers,
+          tokenizerName: fullStats.tokenizerName,
+          totalTokens: fullStats.totalTokens,
+          isCalculating: false,
+        });
+
+        // Notify store to trigger re-render
+        this.onCalculationComplete(workspaceId);
+      } catch (error) {
+        // Cancellations are expected during rapid events - don't cache, don't log
+        // This allows lazy trigger to retry on next access
+        if (error instanceof Error && error.message === "Cancelled by newer request") {
+          return;
+        }
+
+        // Real errors: log and cache empty result
+        console.error(`[WorkspaceConsumerManager] Calculation failed for ${workspaceId}:`, error);
+        this.cache.set(workspaceId, {
+          consumers: [],
+          tokenizerName: "",
+          totalTokens: 0,
+          isCalculating: false,
+        });
+        this.onCalculationComplete(workspaceId);
+      } finally {
+        this.pendingCalcs.delete(workspaceId);
+
+        // If recalculation was requested while we were running, schedule it now
+        const needsRecalcAggregator = this.needsRecalc.get(workspaceId);
+        if (needsRecalcAggregator) {
+          this.needsRecalc.delete(workspaceId);
+          this.scheduleCalculation(workspaceId, needsRecalcAggregator);
+        }
+      }
+    })();
+  }
+
+  /**
+   * Remove workspace state and cleanup timers.
+   */
+  removeWorkspace(workspaceId: string): void {
+    // Clear debounce timer
+    const timer = this.debounceTimers.get(workspaceId);
+    if (timer) {
+      clearTimeout(timer);
+      this.debounceTimers.delete(workspaceId);
+    }
+
+    // Clean up state
+    this.cache.delete(workspaceId);
+    this.scheduledCalcs.delete(workspaceId);
+    this.pendingCalcs.delete(workspaceId);
+    this.needsRecalc.delete(workspaceId);
+  }
+
+  /**
+   * Cleanup all resources.
+   */
+  dispose(): void {
+    // Clear all debounce timers
+    for (const timer of this.debounceTimers.values()) {
+      clearTimeout(timer);
+    }
+    this.debounceTimers.clear();
+
+    // Terminate worker
+    this.tokenWorker.terminate();
+
+    // Clear state
+    this.cache.clear();
+    this.scheduledCalcs.clear();
+    this.pendingCalcs.clear();
+  }
+}
diff --git a/src/stores/WorkspaceStore.ts b/src/stores/WorkspaceStore.ts
index 881e106fb..5e9b97778 100644
--- a/src/stores/WorkspaceStore.ts
+++ b/src/stores/WorkspaceStore.ts
@@ -23,6 +23,11 @@ import {
   isReasoningEnd,
 } from "@/types/ipc";
 import { MapStore } from "./MapStore";
+import { createDisplayUsage } from "@/utils/tokens/tokenStatsCalculator";
+import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
+import type { ChatUsageDisplay } from "@/utils/tokens/usageAggregator";
+import type { TokenConsumer } from "@/types/chatStats";
+import type { LanguageModelV2Usage } from "@ai-sdk/provider";
 
 export interface WorkspaceState {
   messages: DisplayedMessage[];
@@ -61,6 +66,26 @@ function extractSidebarState(aggregator: StreamingMessageAggregator): WorkspaceS
  */
 type DerivedState = Record<string, number>;
 
+/**
+ * Usage metadata extracted from API responses (no tokenization).
+ * Updates instantly when usage metadata arrives.
+ */
+export interface WorkspaceUsageState {
+  usageHistory: ChatUsageDisplay[];
+  totalTokens: number;
+}
+
+/**
+ * Consumer breakdown requiring tokenization (lazy calculation).
+ * Updates after async Web Worker calculation completes.
+ */
+export interface WorkspaceConsumersState {
+  consumers: TokenConsumer[];
+  tokenizerName: string;
+  totalTokens: number; // Total from tokenization (may differ from usage totalTokens)
+  isCalculating: boolean;
+}
+
 /**
  * External store for workspace aggregators and streaming state.
  *
@@ -76,6 +101,15 @@ export class WorkspaceStore {
   // Derived aggregate state (computed from multiple workspaces)
   private derived = new MapStore<string, DerivedState>();
 
+  // Usage and consumer stores (two-store approach for CostsTab optimization)
+  private usageStore = new MapStore<string, WorkspaceUsageState>();
+  private consumersStore = new MapStore<string, WorkspaceConsumersState>();
+
+  // Manager for consumer calculations (debouncing, caching, lazy loading)
+  // Architecture: WorkspaceStore orchestrates (decides when), manager executes (performs calculations)
+  // Dual-cache: consumersStore (MapStore) handles subscriptions, manager owns data cache
+  private readonly consumerManager: WorkspaceConsumerManager;
+
   // Supporting data structures
   private aggregators = new Map<string, StreamingMessageAggregator>();
   private ipcUnsubscribers = new Map<string, () => void>();
@@ -95,6 +129,11 @@ export class WorkspaceStore {
   constructor(onModelUsed?: (model: string) => void) {
     this.onModelUsed = onModelUsed;
 
+    // Initialize consumer calculation manager
+    this.consumerManager = new WorkspaceConsumerManager((workspaceId) => {
+      this.consumersStore.bump(workspaceId);
+    });
+
     // Note: We DON'T auto-check recency on every state bump.
     // Instead, checkAndBumpRecencyIfChanged() is called explicitly after
     // message completion events (not on deltas) to prevent App.tsx re-renders.
@@ -262,6 +301,105 @@ export class WorkspaceStore {
     return aggregator ? aggregator.getCurrentTodos() : [];
   }
 
+  /**
+   * Extract usage from messages (no tokenization).
+   * Each usage entry calculated with its own model for accurate costs.
+   */
+  getWorkspaceUsage(workspaceId: string): WorkspaceUsageState {
+    return this.usageStore.get(workspaceId, () => {
+      const aggregator = this.getOrCreateAggregator(workspaceId);
+      const messages = aggregator.getAllMessages();
+
+      // Extract usage from assistant messages
+      const usageHistory: ChatUsageDisplay[] = [];
+
+      for (const msg of messages) {
+        if (msg.role === "assistant" && msg.metadata?.usage) {
+          // Use the model from this specific message (not global)
+          const model = msg.metadata.model ?? aggregator.getCurrentModel() ?? "unknown";
+
+          const usage = createDisplayUsage(
+            msg.metadata.usage,
+            model,
+            msg.metadata.providerMetadata
+          );
+
+          if (usage) {
+            usageHistory.push(usage);
+          }
+        }
+      }
+
+      // Calculate total from usage history
+      const totalTokens = usageHistory.reduce(
+        (sum, u) =>
+          sum +
+          u.input.tokens +
+          u.cached.tokens +
+          u.cacheCreate.tokens +
+          u.output.tokens +
+          u.reasoning.tokens,
+        0
+      );
+
+      return { usageHistory, totalTokens };
+    });
+  }
+
+  /**
+   * Get consumer breakdown (may be calculating).
+   * Triggers lazy calculation if workspace is caught-up but no data exists.
+   *
+   * Architecture: Lazy trigger runs on EVERY access (outside MapStore.get())
+   * so workspace switches trigger calculation even if MapStore has cached result.
+   */
+  getWorkspaceConsumers(workspaceId: string): WorkspaceConsumersState {
+    const aggregator = this.aggregators.get(workspaceId);
+    const isCaughtUp = this.caughtUp.get(workspaceId) ?? false;
+
+    // Lazy trigger check (runs on EVERY access, not just when MapStore recomputes)
+    const cached = this.consumerManager.getCachedState(workspaceId);
+    const isPending = this.consumerManager.isPending(workspaceId);
+
+    if (!cached && !isPending && isCaughtUp) {
+      if (aggregator && aggregator.getAllMessages().length > 0) {
+        this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+      }
+    }
+
+    // Return state (MapStore handles subscriptions, delegates to manager for actual state)
+    return this.consumersStore.get(workspaceId, () => {
+      return this.consumerManager.getStateSync(workspaceId);
+    });
+  }
+
+  /**
+   * Subscribe to usage store changes for a specific workspace.
+   */
+  subscribeUsage(workspaceId: string, listener: () => void): () => void {
+    return this.usageStore.subscribeKey(workspaceId, listener);
+  }
+
+  /**
+   * Subscribe to consumer store changes for a specific workspace.
+   */
+  subscribeConsumers(workspaceId: string, listener: () => void): () => void {
+    return this.consumersStore.subscribeKey(workspaceId, listener);
+  }
+
+  /**
+   * Helper to bump usage store if metadata contains usage.
+   * Simplifies event handling logic and provides forward compatibility.
+   */
+  private bumpUsageIfPresent(
+    workspaceId: string,
+    metadata?: { usage?: LanguageModelV2Usage; model?: string }
+  ): void {
+    if (metadata?.usage) {
+      this.usageStore.bump(workspaceId);
+    }
+  }
+
   /**
    * Add a workspace and subscribe to its IPC events.
    */
@@ -301,6 +439,9 @@ export class WorkspaceStore {
    * Remove a workspace and clean up subscriptions.
    */
   removeWorkspace(workspaceId: string): void {
+    // Clean up consumer manager state
+    this.consumerManager.removeWorkspace(workspaceId);
+
     // Unsubscribe from IPC
     const unsubscribe = this.ipcUnsubscribers.get(workspaceId);
     if (unsubscribe) {
@@ -310,6 +451,8 @@ export class WorkspaceStore {
 
     // Clean up state
     this.states.delete(workspaceId);
+    this.usageStore.delete(workspaceId);
+    this.consumersStore.delete(workspaceId);
     this.aggregators.delete(workspaceId);
     this.caughtUp.delete(workspaceId);
     this.historicalMessages.delete(workspaceId);
@@ -345,12 +488,17 @@ export class WorkspaceStore {
    * Cleanup all subscriptions (call on unmount).
    */
   dispose(): void {
+    // Clean up consumer manager
+    this.consumerManager.dispose();
+
     for (const unsubscribe of this.ipcUnsubscribers.values()) {
       unsubscribe();
     }
     this.ipcUnsubscribers.clear();
     this.states.clear();
     this.derived.clear();
+    this.usageStore.clear();
+    this.consumersStore.clear();
     this.aggregators.clear();
     this.caughtUp.clear();
     this.historicalMessages.clear();
@@ -403,6 +551,13 @@ export class WorkspaceStore {
       this.caughtUp.set(workspaceId, true);
       this.states.bump(workspaceId);
       this.checkAndBumpRecencyIfChanged(); // Messages loaded, update recency
+
+      // Bump usage after loading history
+      this.usageStore.bump(workspaceId);
+
+      // Queue consumer calculation in background
+      this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+
       return;
     }
 
@@ -423,6 +578,9 @@ export class WorkspaceStore {
     aggregator: StreamingMessageAggregator,
     data: WorkspaceChatMessage
   ): void {
+    // Bump usage if metadata present (forward compatible - works for any event type)
+    this.bumpUsageIfPresent(workspaceId, "metadata" in data ? data.metadata : undefined);
+
     if (isStreamError(data)) {
       aggregator.handleStreamError(data);
       this.states.bump(workspaceId);
@@ -524,6 +682,10 @@ export class WorkspaceStore {
 
       this.states.bump(workspaceId);
       this.checkAndBumpRecencyIfChanged(); // Stream ended, update recency
+
+      // Queue consumer calculation in background
+      this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+
       return;
     }
 
@@ -536,6 +698,14 @@ export class WorkspaceStore {
           detail: { workspaceId },
         })
       );
+
+      this.bumpUsageIfPresent(workspaceId, data.metadata);
+
+      // Recalculate consumers if usage updated (abort may have usage if stream completed)
+      if (data.metadata?.usage) {
+        this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+      }
+
       return;
     }
 
@@ -554,6 +724,11 @@ export class WorkspaceStore {
     if (isToolCallEnd(data)) {
       aggregator.handleToolCallEnd(data);
       this.states.bump(workspaceId);
+
+      // Bump consumers on tool-end for real-time updates during streaming
+      // Tools complete before stream-end, so we want breakdown to update immediately
+      this.consumerManager.scheduleCalculation(workspaceId, aggregator);
+
       return;
     }
 
@@ -657,3 +832,27 @@ export function useWorkspaceAggregator(workspaceId: string) {
   const store = useWorkspaceStoreRaw();
   return store.getAggregator(workspaceId);
 }
+
+/**
+ * Hook for usage metadata (instant, no tokenization).
+ * Updates immediately when usage metadata arrives from API responses.
+ */
+export function useWorkspaceUsage(workspaceId: string): WorkspaceUsageState {
+  const store = getStoreInstance();
+  return useSyncExternalStore(
+    (listener) => store.subscribeUsage(workspaceId, listener),
+    () => store.getWorkspaceUsage(workspaceId)
+  );
+}
+
+/**
+ * Hook for consumer breakdown (lazy, with tokenization).
+ * Updates after async Web Worker calculation completes.
+ */
+export function useWorkspaceConsumers(workspaceId: string): WorkspaceConsumersState {
+  const store = getStoreInstance();
+  return useSyncExternalStore(
+    (listener) => store.subscribeConsumers(workspaceId, listener),
+    () => store.getWorkspaceConsumers(workspaceId)
+  );
+}
diff --git a/src/utils/tokens/tokenMeterUtils.ts b/src/utils/tokens/tokenMeterUtils.ts
index fae341ea1..51caf8774 100644
--- a/src/utils/tokens/tokenMeterUtils.ts
+++ b/src/utils/tokens/tokenMeterUtils.ts
@@ -25,7 +25,7 @@ export interface TokenMeterData {
 
 interface SegmentDef {
   type: TokenSegment["type"];
-  key: keyof ChatUsageDisplay;
+  key: "input" | "cached" | "cacheCreate" | "output" | "reasoning";
   color: string;
   label: string;
 }
diff --git a/src/utils/tokens/tokenStatsCalculator.test.ts b/src/utils/tokens/tokenStatsCalculator.test.ts
new file mode 100644
index 000000000..18b029ad8
--- /dev/null
+++ b/src/utils/tokens/tokenStatsCalculator.test.ts
@@ -0,0 +1,108 @@
+import { describe, test, expect } from "@jest/globals";
+import { createDisplayUsage } from "./tokenStatsCalculator";
+import type { LanguageModelV2Usage } from "@ai-sdk/provider";
+
+describe("createDisplayUsage", () => {
+  test("uses usage.reasoningTokens when available", () => {
+    const usage: LanguageModelV2Usage = {
+      inputTokens: 1000,
+      outputTokens: 500,
+      totalTokens: 1500,
+      reasoningTokens: 100,
+    };
+
+    const result = createDisplayUsage(usage, "openai:gpt-5-pro");
+
+    expect(result?.reasoning.tokens).toBe(100);
+    expect(result?.output.tokens).toBe(400); // 500 - 100
+  });
+
+  test("falls back to providerMetadata.openai.reasoningTokens when usage.reasoningTokens is undefined", () => {
+    const usage: LanguageModelV2Usage = {
+      inputTokens: 1000,
+      outputTokens: 500,
+      totalTokens: 1500,
+      // reasoningTokens not provided
+    };
+
+    const providerMetadata = {
+      openai: {
+        reasoningTokens: 150,
+        responseId: "resp_123",
+        serviceTier: "default",
+      },
+    };
+
+    const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata);
+
+    expect(result?.reasoning.tokens).toBe(150);
+    expect(result?.output.tokens).toBe(350); // 500 - 150
+  });
+
+  test("uses 0 when both usage.reasoningTokens and providerMetadata.openai.reasoningTokens are undefined", () => {
+    const usage: LanguageModelV2Usage = {
+      inputTokens: 1000,
+      outputTokens: 500,
+      totalTokens: 1500,
+    };
+
+    const providerMetadata = {
+      openai: {
+        responseId: "resp_123",
+        serviceTier: "default",
+      },
+    };
+
+    const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata);
+
+    expect(result?.reasoning.tokens).toBe(0);
+    expect(result?.output.tokens).toBe(500); // All output tokens
+  });
+
+  test("prefers usage.reasoningTokens over providerMetadata when both exist", () => {
+    const usage: LanguageModelV2Usage = {
+      inputTokens: 1000,
+      outputTokens: 500,
+      totalTokens: 1500,
+      reasoningTokens: 100,
+    };
+
+    const providerMetadata = {
+      openai: {
+        reasoningTokens: 999, // Should be ignored
+        responseId: "resp_123",
+        serviceTier: "default",
+      },
+    };
+
+    const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata);
+
+    expect(result?.reasoning.tokens).toBe(100); // Uses usage, not providerMetadata
+    expect(result?.output.tokens).toBe(400); // 500 - 100
+  });
+
+  test("works with non-OpenAI providers that don't have providerMetadata.openai", () => {
+    const usage: LanguageModelV2Usage = {
+      inputTokens: 1000,
+      outputTokens: 500,
+      totalTokens: 1500,
+      reasoningTokens: 200,
+    };
+
+    const providerMetadata = {
+      anthropic: {
+        cacheCreationInputTokens: 50,
+      },
+    };
+
+    const result = createDisplayUsage(
+      usage,
+      "anthropic:claude-sonnet-4-20250514",
+      providerMetadata
+    );
+
+    expect(result?.reasoning.tokens).toBe(200);
+    expect(result?.output.tokens).toBe(300); // 500 - 200
+    expect(result?.cacheCreate.tokens).toBe(50); // Anthropic metadata still works
+  });
+});
diff --git a/src/utils/tokens/tokenStatsCalculator.ts b/src/utils/tokens/tokenStatsCalculator.ts
index a6e641e58..8507be873 100644
--- a/src/utils/tokens/tokenStatsCalculator.ts
+++ b/src/utils/tokens/tokenStatsCalculator.ts
@@ -1,6 +1,6 @@
 /**
  * Shared token statistics calculation logic
- * Used by both frontend (ChatContext) and backend (debug commands)
+ * Used by both frontend (WorkspaceStore) and backend (debug commands)
  *
  * IMPORTANT: This utility is intentionally abstracted so that the debug command
  * (`bun debug costs`) has exact parity with the UI display in the Costs tab.
@@ -45,11 +45,14 @@ export function createDisplayUsage(
     (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined)
       ?.cacheCreationInputTokens ?? 0;
 
+  // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific)
+  const reasoningTokens =
+    usage.reasoningTokens ??
+    (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ??
+    0;
+
   // Calculate output tokens excluding reasoning
-  const outputWithoutReasoning = Math.max(
-    0,
-    (usage.outputTokens ?? 0) - (usage.reasoningTokens ?? 0)
-  );
+  const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens);
 
   // Get model stats for cost calculation
   const modelStats = getModelStats(model);
@@ -66,7 +69,7 @@ export function createDisplayUsage(
     cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
     cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
     outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
-    reasoningCost = (usage.reasoningTokens ?? 0) * modelStats.output_cost_per_token;
+    reasoningCost = reasoningTokens * modelStats.output_cost_per_token;
   }
 
   return {
@@ -87,9 +90,10 @@ export function createDisplayUsage(
       cost_usd: outputCost,
     },
     reasoning: {
-      tokens: usage.reasoningTokens ?? 0,
+      tokens: reasoningTokens,
       cost_usd: reasoningCost,
     },
+    model, // Include model for display purposes
   };
 }
 
diff --git a/src/utils/tokens/usageAggregator.ts b/src/utils/tokens/usageAggregator.ts
index 61a439c60..afd9d1849 100644
--- a/src/utils/tokens/usageAggregator.ts
+++ b/src/utils/tokens/usageAggregator.ts
@@ -26,6 +26,9 @@ export interface ChatUsageDisplay {
   // totalOutput = output + reasoning
   output: ChatUsageComponent;
   reasoning: ChatUsageComponent;
+
+  // Optional model field for display purposes (context window calculation, etc.)
+  model?: string;
 }
 
 /**
@@ -48,7 +51,14 @@ export function sumUsageHistory(usageHistory: ChatUsageDisplay[]): ChatUsageDisp
 
   for (const usage of usageHistory) {
     // Iterate over each component and sum tokens and costs
-    for (const key of Object.keys(sum) as Array<keyof ChatUsageDisplay>) {
+    const componentKeys: Array<"input" | "cached" | "cacheCreate" | "output" | "reasoning"> = [
+      "input",
+      "cached",
+      "cacheCreate",
+      "output",
+      "reasoning",
+    ];
+    for (const key of componentKeys) {
       sum[key].tokens += usage[key].tokens;
       if (usage[key].cost_usd === undefined) {
         hasUndefinedCosts = true;