diff --git a/src/components/AIView.tsx b/src/components/AIView.tsx index 1fc21b1d3..6d537db85 100644 --- a/src/components/AIView.tsx +++ b/src/components/AIView.tsx @@ -13,7 +13,6 @@ import { mergeConsecutiveStreamErrors, } from "@/utils/messages/messageUtils"; import { hasInterruptedStream } from "@/utils/messages/retryEligibility"; -import { ChatProvider } from "@/contexts/ChatContext"; import { ThinkingProvider } from "@/contexts/ThinkingContext"; import { ModeProvider } from "@/contexts/ModeContext"; import { formatKeybind, KEYBINDS } from "@/utils/ui/keybinds"; @@ -379,8 +378,7 @@ const AIViewInner: React.FC = ({ } // Extract state from workspace state - const { messages, canInterrupt, isCompacting, loading, cmuxMessages, currentModel } = - workspaceState; + const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState; // Get active stream message ID for token counting const activeStreamMessageId = aggregator.getActiveStreamMessageId(); @@ -426,147 +424,143 @@ const AIViewInner: React.FC = ({ } return ( - - - - - - + + + + + + {projectName} / {branch} + {namedWorkspacePath} + + + + + + + + Open in terminal ({formatKeybind(KEYBINDS.OPEN_TERMINAL)}) + + + + + + + + {mergedMessages.length === 0 ? ( + +

No Messages Yet

+

Send a message below to begin

+
+ ) : ( + <> + {mergedMessages.map((msg) => { + const isAtCutoff = + editCutoffHistoryId !== undefined && + msg.type !== "history-hidden" && + msg.historyId === editCutoffHistoryId; + + return ( + +
+ +
+ {isAtCutoff && ( + + ⚠️ Messages below this line will be removed when you submit the edit + + )} + {shouldShowInterruptedBarrier(msg) && } +
+ ); + })} + {/* Show RetryBarrier after the last message if needed */} + {showRetryBarrier && ( + setAutoRetry(false)} + onResetAutoRetry={() => setAutoRetry(true)} + /> + )} + + )} + + {canInterrupt && ( + - - {projectName} / {branch} - {namedWorkspacePath} - - - - - - - - Open in terminal ({formatKeybind(KEYBINDS.OPEN_TERMINAL)}) - - -
-
- - - - {mergedMessages.length === 0 ? ( - -

No Messages Yet

-

Send a message below to begin

-
- ) : ( - <> - {mergedMessages.map((msg) => { - const isAtCutoff = - editCutoffHistoryId !== undefined && - msg.type !== "history-hidden" && - msg.historyId === editCutoffHistoryId; - - return ( - -
- -
- {isAtCutoff && ( - - ⚠️ Messages below this line will be removed when you submit the edit - - )} - {shouldShowInterruptedBarrier(msg) && } -
- ); - })} - {/* Show RetryBarrier after the last message if needed */} - {showRetryBarrier && ( - setAutoRetry(false)} - onResetAutoRetry={() => setAutoRetry(true)} - /> - )} - - )} - - {canInterrupt && ( - - )} -
- {!autoScroll && ( - - Press {formatKeybind(KEYBINDS.JUMP_TO_BOTTOM)} to jump to bottom - )} -
- - -
- - -
-
+ + {!autoScroll && ( + + Press {formatKeybind(KEYBINDS.JUMP_TO_BOTTOM)} to jump to bottom + + )} + + + + + + + ); }; diff --git a/src/components/ChatMetaSidebar.tsx b/src/components/ChatMetaSidebar.tsx index 149df7448..69558d7a0 100644 --- a/src/components/ChatMetaSidebar.tsx +++ b/src/components/ChatMetaSidebar.tsx @@ -1,7 +1,7 @@ import React from "react"; import styled from "@emotion/styled"; import { usePersistedState } from "@/hooks/usePersistedState"; -import { useChatContext } from "@/contexts/ChatContext"; +import { useWorkspaceUsage } from "@/stores/WorkspaceStore"; import { use1MContext } from "@/hooks/use1MContext"; import { useResizeObserver } from "@/hooks/useResizeObserver"; import { CostsTab } from "./ChatMetaSidebar/CostsTab"; @@ -87,13 +87,13 @@ interface ChatMetaSidebarProps { chatAreaRef: React.RefObject; } -export const ChatMetaSidebar: React.FC = ({ workspaceId, chatAreaRef }) => { +const ChatMetaSidebarComponent: React.FC = ({ workspaceId, chatAreaRef }) => { const [selectedTab, setSelectedTab] = usePersistedState( `chat-meta-sidebar-tab:${workspaceId}`, "costs" ); - const { stats } = useChatContext(); + const usage = useWorkspaceUsage(workspaceId); const [use1M] = use1MContext(); const chatAreaSize = useResizeObserver(chatAreaRef); @@ -103,14 +103,16 @@ export const ChatMetaSidebar: React.FC = ({ workspaceId, c const costsPanelId = `${baseId}-panel-costs`; const toolsPanelId = `${baseId}-panel-tools`; - const lastUsage = stats?.usageHistory[stats.usageHistory.length - 1]; + const lastUsage = usage?.usageHistory[usage.usageHistory.length - 1]; // Memoize vertical meter data calculation to prevent unnecessary re-renders const verticalMeterData = React.useMemo(() => { - return lastUsage && stats - ? calculateTokenMeterData(lastUsage, stats.model, use1M, true) + // Get model from last usage + const model = lastUsage?.model ?? "unknown"; + return lastUsage + ? calculateTokenMeterData(lastUsage, model, use1M, true) : { segments: [], totalTokens: 0, totalPercentage: 0 }; - }, [lastUsage, stats, use1M]); + }, [lastUsage, use1M]); // Calculate if we should show collapsed view with hysteresis // Strategy: Observe ChatArea width directly (independent of sidebar width) @@ -168,7 +170,7 @@ export const ChatMetaSidebar: React.FC = ({ workspaceId, c {selectedTab === "costs" && (
- +
)} {selectedTab === "tools" && ( @@ -184,3 +186,7 @@ export const ChatMetaSidebar: React.FC = ({ workspaceId, c ); }; + +// Memoize to prevent re-renders when parent (AIView) re-renders during streaming +// Only re-renders when workspaceId or chatAreaRef changes, or internal state updates +export const ChatMetaSidebar = React.memo(ChatMetaSidebarComponent); diff --git a/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx b/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx new file mode 100644 index 000000000..70916e119 --- /dev/null +++ b/src/components/ChatMetaSidebar/ConsumerBreakdown.tsx @@ -0,0 +1,189 @@ +import React from "react"; +import styled from "@emotion/styled"; +import type { WorkspaceConsumersState } from "@/stores/WorkspaceStore"; +import { TooltipWrapper, Tooltip, HelpIndicator } from "../Tooltip"; + +const TokenizerInfo = styled.div` + color: #888888; + font-size: 12px; + margin-bottom: 8px; +`; + +const ConsumerList = styled.div` + display: flex; + flex-direction: column; + gap: 12px; +`; + +const ConsumerRow = styled.div` + display: flex; + flex-direction: column; + gap: 4px; + margin-bottom: 8px; +`; + +const ConsumerHeader = styled.div` + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 4px; +`; + +const ConsumerName = styled.span` + color: #cccccc; + font-weight: 500; + display: flex; + align-items: center; + gap: 4px; +`; + +const ConsumerTokens = styled.span` + color: #888888; + font-size: 12px; +`; + +const PercentageBarWrapper = styled.div` + display: flex; + flex-direction: column; + gap: 4px; +`; + +const PercentageBar = styled.div` + width: 100%; + height: 8px; + background: #2a2a2a; + border-radius: 4px; + overflow: hidden; + display: flex; +`; + +interface SegmentProps { + percentage: number; +} + +const PercentageFill = styled.div` + height: 100%; + width: ${(props) => props.percentage}%; + background: linear-gradient(90deg, #4a9eff 0%, #6b5ce7 100%); + transition: width 0.3s ease; +`; + +const FixedSegment = styled.div` + height: 100%; + width: ${(props) => props.percentage}%; + background: var(--color-token-fixed); + transition: width 0.3s ease; +`; + +const VariableSegment = styled.div` + height: 100%; + width: ${(props) => props.percentage}%; + background: var(--color-token-variable); + transition: width 0.3s ease; +`; + +const TokenDetails = styled.div` + color: #666666; + font-size: 11px; + text-align: left; +`; + +const LoadingState = styled.div` + color: #888888; + font-style: italic; + padding: 12px 0; +`; + +const EmptyState = styled.div` + color: #666666; + font-style: italic; + padding: 12px 0; + text-align: left; + + p { + margin: 4px 0; + } +`; + +// Format token display - show k for thousands with 1 decimal +const formatTokens = (tokens: number) => + tokens >= 1000 ? `${(tokens / 1000).toFixed(1)}k` : tokens.toLocaleString(); + +interface ConsumerBreakdownProps { + consumers: WorkspaceConsumersState; +} + +const ConsumerBreakdownComponent: React.FC = ({ consumers }) => { + if (consumers.isCalculating) { + return Calculating consumer breakdown...; + } + + if (consumers.consumers.length === 0) { + return No consumer data available; + } + + return ( + <> + + Tokenizer: {consumers.tokenizerName} + + + {consumers.consumers.map((consumer) => { + // Calculate percentages for fixed and variable segments + const fixedPercentage = consumer.fixedTokens + ? (consumer.fixedTokens / consumers.totalTokens) * 100 + : 0; + const variablePercentage = consumer.variableTokens + ? (consumer.variableTokens / consumers.totalTokens) * 100 + : 0; + + const tokenDisplay = formatTokens(consumer.tokens); + + return ( + + + + {consumer.name} + {consumer.name === "web_search" && ( + + ? + + Web search results are encrypted and decrypted server-side. This estimate is + approximate. + + + )} + + + {tokenDisplay} ({consumer.percentage.toFixed(1)}%) + + + + + {consumer.fixedTokens && consumer.variableTokens ? ( + <> + + + + ) : ( + + )} + + {consumer.fixedTokens && consumer.variableTokens && ( + + Tool definition: {formatTokens(consumer.fixedTokens)} • Usage:{" "} + {formatTokens(consumer.variableTokens)} + + )} + + + ); + })} + + + ); +}; + +// Memoize to prevent re-renders when parent re-renders but consumers data hasn't changed +// Only re-renders when consumers object reference changes (when store bumps it) +export const ConsumerBreakdown = React.memo(ConsumerBreakdownComponent); diff --git a/src/components/ChatMetaSidebar/CostsTab.tsx b/src/components/ChatMetaSidebar/CostsTab.tsx index 8de087c79..1800555dc 100644 --- a/src/components/ChatMetaSidebar/CostsTab.tsx +++ b/src/components/ChatMetaSidebar/CostsTab.tsx @@ -1,7 +1,6 @@ import React from "react"; import styled from "@emotion/styled"; -import { useChatContext } from "@/contexts/ChatContext"; -import { TooltipWrapper, Tooltip, HelpIndicator } from "../Tooltip"; +import { useWorkspaceUsage, useWorkspaceConsumers } from "@/stores/WorkspaceStore"; import { getModelStats } from "@/utils/tokens/modelStats"; import { sumUsageHistory } from "@/utils/tokens/usageAggregator"; import { usePersistedState } from "@/hooks/usePersistedState"; @@ -9,6 +8,7 @@ import { ToggleGroup, type ToggleOption } from "../ToggleGroup"; import { use1MContext } from "@/hooks/use1MContext"; import { supports1MContext } from "@/utils/ai/models"; import { TOKEN_COMPONENT_COLORS } from "@/utils/tokens/tokenMeterUtils"; +import { ConsumerBreakdown } from "./ConsumerBreakdown"; const Container = styled.div` color: #d4d4d4; @@ -17,8 +17,9 @@ const Container = styled.div` line-height: 1.6; `; -const Section = styled.div` - margin-bottom: 24px; +const Section = styled.div<{ marginTop?: string; marginBottom?: string }>` + margin-bottom: ${(props) => props.marginBottom ?? "24px"}; + margin-top: ${(props) => props.marginTop ?? "0"}; `; const SectionTitle = styled.h3<{ dimmed?: boolean }>` @@ -30,12 +31,6 @@ const SectionTitle = styled.h3<{ dimmed?: boolean }>` letter-spacing: 0.5px; `; -const TokenizerInfo = styled.div` - color: #888888; - font-size: 12px; - margin-bottom: 8px; -`; - const ConsumerList = styled.div` display: flex; flex-direction: column; @@ -87,20 +82,6 @@ interface SegmentProps { percentage: number; } -const FixedSegment = styled.div` - height: 100%; - width: ${(props) => props.percentage}%; - background: var(--color-token-fixed); - transition: width 0.3s ease; -`; - -const VariableSegment = styled.div` - height: 100%; - width: ${(props) => props.percentage}%; - background: var(--color-token-variable); - transition: width 0.3s ease; -`; - const InputSegment = styled.div` height: 100%; width: ${(props) => props.percentage}%; @@ -129,22 +110,6 @@ const CachedSegment = styled.div` transition: width 0.3s ease; `; -interface PercentageFillProps { - percentage: number; -} - -const PercentageFill = styled.div` - height: 100%; - width: ${(props) => props.percentage}%; - background: var(--color-token-completion); - transition: width 0.3s ease; -`; - -const LoadingState = styled.div` - color: #888888; - font-style: italic; -`; - const EmptyState = styled.div` color: #888888; text-align: center; @@ -158,14 +123,6 @@ const ModelWarning = styled.div` font-style: italic; `; -const TokenDetails = styled.div` - color: #888888; - font-size: 11px; - margin-top: 6px; - padding-left: 4px; - line-height: 1.4; -`; - const DetailsTable = styled.table` width: 100%; margin-top: 4px; @@ -222,13 +179,6 @@ const DimmedCost = styled.span` font-style: italic; `; -const SectionHeader = styled.div` - display: flex; - justify-content: flex-start; - align-items: center; - margin-bottom: 12px; -`; - // Format token display - show k for thousands with 1 decimal const formatTokens = (tokens: number) => tokens >= 1000 ? `${(tokens / 1000).toFixed(1)}k` : tokens.toLocaleString(); @@ -267,25 +217,27 @@ const calculateElevatedCost = (tokens: number, standardRate: number, isInput: bo type ViewMode = "last-request" | "session"; const VIEW_MODE_OPTIONS: Array> = [ - { value: "last-request", label: "Last Request" }, { value: "session", label: "Session" }, + { value: "last-request", label: "Last Request" }, ]; -export const CostsTab: React.FC = () => { - const { stats, isCalculating } = useChatContext(); - const [viewMode, setViewMode] = usePersistedState("costsTab:viewMode", "last-request"); +interface CostsTabProps { + workspaceId: string; +} + +const CostsTabComponent: React.FC = ({ workspaceId }) => { + const usage = useWorkspaceUsage(workspaceId); + const consumers = useWorkspaceConsumers(workspaceId); + const [viewMode, setViewMode] = usePersistedState("costsTab:viewMode", "session"); const [use1M] = use1MContext(); - // Only show loading if we don't have any stats yet - if (isCalculating && !stats) { - return ( - - Calculating token usage... - - ); - } + // Check if we have any data to display + const hasUsageData = usage && usage.usageHistory.length > 0; + const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating); + const hasAnyData = hasUsageData || hasConsumerData; - if (!stats || stats.totalTokens === 0) { + // Only show empty state if truly no data anywhere + if (!hasAnyData) { return ( @@ -296,37 +248,46 @@ export const CostsTab: React.FC = () => { ); } - // Compute displayUsage based on view mode + // Context Usage always shows Last Request data + const lastRequestUsage = hasUsageData + ? usage.usageHistory[usage.usageHistory.length - 1] + : undefined; + + // Cost and Details table use viewMode const displayUsage = viewMode === "last-request" - ? stats.usageHistory[stats.usageHistory.length - 1] - : sumUsageHistory(stats.usageHistory); + ? usage.usageHistory[usage.usageHistory.length - 1] + : sumUsageHistory(usage.usageHistory); return ( - {stats.usageHistory.length > 0 && ( -
- - - - + {hasUsageData && ( +
+ {(() => { + // Context Usage always uses last request + const contextUsage = lastRequestUsage; + + // Get model from last request (for context window display) + const model = lastRequestUsage?.model ?? "unknown"; + // Get max tokens for the model from the model stats database - const modelStats = getModelStats(stats.model); + const modelStats = getModelStats(model); const baseMaxTokens = modelStats?.max_input_tokens; // Check if 1M context is active and supported - const is1MActive = use1M && supports1MContext(stats.model); + const is1MActive = use1M && supports1MContext(model); const maxTokens = is1MActive ? 1_000_000 : baseMaxTokens; + // Total tokens includes cache creation (they're input tokens sent for caching) - const totalUsed = displayUsage - ? displayUsage.input.tokens + - displayUsage.cached.tokens + - displayUsage.cacheCreate.tokens + - displayUsage.output.tokens + - displayUsage.reasoning.tokens + const totalUsed = contextUsage + ? contextUsage.input.tokens + + contextUsage.cached.tokens + + contextUsage.cacheCreate.tokens + + contextUsage.output.tokens + + contextUsage.reasoning.tokens : 0; - // Calculate percentages + // Calculate percentages based on max tokens (actual context window usage) let inputPercentage: number; let outputPercentage: number; let cachedPercentage: number; @@ -335,34 +296,25 @@ export const CostsTab: React.FC = () => { let showWarning = false; let totalPercentage: number; - // For session mode, always show bar as full (100%) based on relative token distribution - if (viewMode === "session" && displayUsage && totalUsed > 0) { - // Scale to total tokens used (bar always full) - inputPercentage = (displayUsage.input.tokens / totalUsed) * 100; - outputPercentage = (displayUsage.output.tokens / totalUsed) * 100; - cachedPercentage = (displayUsage.cached.tokens / totalUsed) * 100; - cacheCreatePercentage = (displayUsage.cacheCreate.tokens / totalUsed) * 100; - reasoningPercentage = (displayUsage.reasoning.tokens / totalUsed) * 100; - totalPercentage = 100; - } else if (maxTokens && displayUsage) { + if (maxTokens && contextUsage) { // We know the model's max tokens - show actual context window usage - inputPercentage = (displayUsage.input.tokens / maxTokens) * 100; - outputPercentage = (displayUsage.output.tokens / maxTokens) * 100; - cachedPercentage = (displayUsage.cached.tokens / maxTokens) * 100; - cacheCreatePercentage = (displayUsage.cacheCreate.tokens / maxTokens) * 100; - reasoningPercentage = (displayUsage.reasoning.tokens / maxTokens) * 100; + inputPercentage = (contextUsage.input.tokens / maxTokens) * 100; + outputPercentage = (contextUsage.output.tokens / maxTokens) * 100; + cachedPercentage = (contextUsage.cached.tokens / maxTokens) * 100; + cacheCreatePercentage = (contextUsage.cacheCreate.tokens / maxTokens) * 100; + reasoningPercentage = (contextUsage.reasoning.tokens / maxTokens) * 100; totalPercentage = (totalUsed / maxTokens) * 100; - } else if (displayUsage) { + } else if (contextUsage) { // Unknown model - scale to total tokens used - inputPercentage = totalUsed > 0 ? (displayUsage.input.tokens / totalUsed) * 100 : 0; + inputPercentage = totalUsed > 0 ? (contextUsage.input.tokens / totalUsed) * 100 : 0; outputPercentage = - totalUsed > 0 ? (displayUsage.output.tokens / totalUsed) * 100 : 0; + totalUsed > 0 ? (contextUsage.output.tokens / totalUsed) * 100 : 0; cachedPercentage = - totalUsed > 0 ? (displayUsage.cached.tokens / totalUsed) * 100 : 0; + totalUsed > 0 ? (contextUsage.cached.tokens / totalUsed) * 100 : 0; cacheCreatePercentage = - totalUsed > 0 ? (displayUsage.cacheCreate.tokens / totalUsed) * 100 : 0; + totalUsed > 0 ? (contextUsage.cacheCreate.tokens / totalUsed) * 100 : 0; reasoningPercentage = - totalUsed > 0 ? (displayUsage.reasoning.tokens / totalUsed) * 100 : 0; + totalUsed > 0 ? (contextUsage.reasoning.tokens / totalUsed) * 100 : 0; totalPercentage = 100; showWarning = true; } else { @@ -375,10 +327,52 @@ export const CostsTab: React.FC = () => { } const totalDisplay = formatTokens(totalUsed); - // For session mode, don't show max tokens or percentage - const maxDisplay = - viewMode === "session" ? "" : maxTokens ? ` / ${formatTokens(maxTokens)}` : ""; - const showPercentage = viewMode !== "session"; + const maxDisplay = maxTokens ? ` / ${formatTokens(maxTokens)}` : ""; + + return ( + <> + + + Context Usage + + {totalDisplay} + {maxDisplay} + {` (${totalPercentage.toFixed(1)}%)`} + + + + + {cachedPercentage > 0 && } + {cacheCreatePercentage > 0 && ( + + )} + + + {reasoningPercentage > 0 && ( + + )} + + + + {showWarning && ( + Unknown model limits - showing relative usage only + )} + + ); + })()} + +
+ )} + + {hasUsageData && ( +
+ + {(() => { + // Cost and Details use viewMode-dependent data + // Get model from the displayUsage (which could be last request or session sum) + const model = displayUsage?.model ?? lastRequestUsage?.model ?? "unknown"; + const modelStats = getModelStats(model); + const is1MActive = use1M && supports1MContext(model); // Helper to calculate cost percentage const getCostPercentage = (cost: number | undefined, total: number | undefined) => @@ -481,33 +475,17 @@ export const CostsTab: React.FC = () => { return ( <> - - - Token Usage - - {totalDisplay} - {maxDisplay} - {showPercentage && ` (${totalPercentage.toFixed(1)}%)`} - - - - - {cachedPercentage > 0 && } - {cacheCreatePercentage > 0 && ( - - )} - - - {reasoningPercentage > 0 && ( - - )} - - - {totalCost !== undefined && totalCost >= 0 && ( - - - Cost + + +
+ Cost + +
{formatCostWithDollar(totalCost)}
@@ -527,7 +505,7 @@ export const CostsTab: React.FC = () => {
)} - + Component @@ -559,9 +537,6 @@ export const CostsTab: React.FC = () => { })} - {showWarning && ( - Unknown model limits - showing relative usage only - )} ); })()} @@ -571,63 +546,12 @@ export const CostsTab: React.FC = () => {
Breakdown by Consumer - - Tokenizer: {stats.tokenizerName} - - - {stats.consumers.map((consumer) => { - // Calculate percentages for fixed and variable segments - const fixedPercentage = consumer.fixedTokens - ? (consumer.fixedTokens / stats.totalTokens) * 100 - : 0; - const variablePercentage = consumer.variableTokens - ? (consumer.variableTokens / stats.totalTokens) * 100 - : 0; - - const tokenDisplay = formatTokens(consumer.tokens); - - return ( - - - - {consumer.name} - {consumer.name === "web_search" && ( - - ? - - Web search results are encrypted and decrypted server-side. This estimate - is approximate. - - - )} - - - {tokenDisplay} ({consumer.percentage.toFixed(1)}%) - - - - - {consumer.fixedTokens && consumer.variableTokens ? ( - <> - - - - ) : ( - - )} - - {consumer.fixedTokens && consumer.variableTokens && ( - - Tool definition: {formatTokens(consumer.fixedTokens)} • Usage:{" "} - {formatTokens(consumer.variableTokens)} - - )} - - - ); - })} - +
); }; + +// Memoize to prevent re-renders when parent (AIView) re-renders during streaming +// Only re-renders when workspaceId changes or internal hook data (usage/consumers) updates +export const CostsTab = React.memo(CostsTabComponent); diff --git a/src/contexts/ChatContext.tsx b/src/contexts/ChatContext.tsx deleted file mode 100644 index 3a64187be..000000000 --- a/src/contexts/ChatContext.tsx +++ /dev/null @@ -1,103 +0,0 @@ -import type { ReactNode } from "react"; -import React, { createContext, useContext, useState, useEffect, useRef } from "react"; -import type { CmuxMessage, DisplayedMessage } from "@/types/message"; -import type { ChatStats } from "@/types/chatStats"; -import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker"; - -interface ChatContextType { - messages: DisplayedMessage[]; - stats: ChatStats | null; - isCalculating: boolean; -} - -const ChatContext = createContext(undefined); - -interface ChatProviderProps { - children: ReactNode; - messages: DisplayedMessage[]; - cmuxMessages: CmuxMessage[]; - model: string; -} - -export const ChatProvider: React.FC = ({ - children, - messages, - cmuxMessages, - model, -}) => { - const [stats, setStats] = useState(null); - const [isCalculating, setIsCalculating] = useState(false); - // Track if we've already scheduled a calculation to prevent timer spam - const calculationScheduledRef = useRef(false); - // Web Worker for off-thread token calculation - const workerRef = useRef(null); - - // Initialize worker once - useEffect(() => { - workerRef.current = new TokenStatsWorker(); - return () => { - workerRef.current?.terminate(); - workerRef.current = null; - }; - }, []); - - useEffect(() => { - if (cmuxMessages.length === 0) { - setStats({ - consumers: [], - totalTokens: 0, - model, - tokenizerName: "No messages", - usageHistory: [], - }); - return; - } - - // IMPORTANT: Prevent duplicate timers during rapid events (reasoning deltas) - // During message loading, 600+ reasoning-delta events fire rapidly, each triggering - // this effect. Without this guard, we'd start 600 timers that all eventually run! - if (calculationScheduledRef.current) return; - - calculationScheduledRef.current = true; - - // Show calculating state immediately (safe now that aggregator cache provides stable refs) - setIsCalculating(true); - - // Debounce calculation by 100ms to avoid blocking on rapid updates - const timeoutId = setTimeout(() => { - // Calculate stats in Web Worker (off main thread) - workerRef.current - ?.calculate(cmuxMessages, model) - .then((calculatedStats) => { - setStats(calculatedStats); - }) - .catch((error) => { - console.error("Failed to calculate token stats:", error); - }) - .finally(() => { - setIsCalculating(false); - calculationScheduledRef.current = false; - }); - }, 100); - - return () => { - clearTimeout(timeoutId); - calculationScheduledRef.current = false; - setIsCalculating(false); - }; - }, [cmuxMessages, model]); - - return ( - - {children} - - ); -}; - -export const useChatContext = () => { - const context = useContext(ChatContext); - if (!context) { - throw new Error("useChatContext must be used within a ChatProvider"); - } - return context; -}; diff --git a/src/stores/WorkspaceConsumerManager.ts b/src/stores/WorkspaceConsumerManager.ts new file mode 100644 index 000000000..628bc7f86 --- /dev/null +++ b/src/stores/WorkspaceConsumerManager.ts @@ -0,0 +1,229 @@ +import type { WorkspaceConsumersState } from "./WorkspaceStore"; +import { TokenStatsWorker } from "@/utils/tokens/TokenStatsWorker"; +import type { StreamingMessageAggregator } from "@/utils/messages/StreamingMessageAggregator"; + +/** + * Manages consumer token calculations for workspaces. + * + * Responsibilities: + * - Debounces rapid calculation requests (e.g., multiple tool-call-end events) + * - Caches calculated results to avoid redundant work (source of truth) + * - Tracks calculation state per workspace + * - Executes Web Worker tokenization calculations + * - Handles cleanup and disposal + * + * Architecture: + * - Single responsibility: consumer tokenization calculations + * - Owns the source-of-truth cache (calculated consumer data) + * - WorkspaceStore orchestrates (decides when to calculate) + * - This manager executes (performs calculations, manages cache) + * + * Dual-Cache Design: + * - WorkspaceConsumerManager.cache: Source of truth for calculated data + * - WorkspaceStore.consumersStore (MapStore): Subscription management only + * (components subscribe to workspace changes, delegates to manager for state) + */ +export class WorkspaceConsumerManager { + // Web Worker for tokenization (shared across workspaces) + private readonly tokenWorker: TokenStatsWorker; + + // Track scheduled calculations (in debounce window, not yet executing) + private scheduledCalcs = new Set(); + + // Track executing calculations (Web Worker running) + private pendingCalcs = new Set(); + + // Track workspaces that need recalculation after current one completes + private needsRecalc = new Map(); + + // Cache calculated consumer data (persists across bumps) + private cache = new Map(); + + // Debounce timers for consumer calculations (prevents rapid-fire during tool sequences) + private debounceTimers = new Map(); + + // Callback to bump the store when calculation completes + private readonly onCalculationComplete: (workspaceId: string) => void; + + constructor(onCalculationComplete: (workspaceId: string) => void) { + this.tokenWorker = new TokenStatsWorker(); + this.onCalculationComplete = onCalculationComplete; + } + + /** + * Get cached state without side effects. + * Returns null if no cache exists. + */ + getCachedState(workspaceId: string): WorkspaceConsumersState | null { + return this.cache.get(workspaceId) ?? null; + } + + /** + * Check if calculation is pending or scheduled for workspace. + */ + isPending(workspaceId: string): boolean { + return this.scheduledCalcs.has(workspaceId) || this.pendingCalcs.has(workspaceId); + } + + /** + * Get current state synchronously without triggering calculations. + * Returns cached result if available, otherwise returns default state. + * + * Note: This is called from WorkspaceStore.getWorkspaceConsumers(), + * which handles the lazy trigger logic separately. + */ + getStateSync(workspaceId: string): WorkspaceConsumersState { + const cached = this.cache.get(workspaceId); + if (cached) { + return cached; + } + + // Default state while scheduled/calculating or before first calculation + return { + consumers: [], + tokenizerName: "", + totalTokens: 0, + isCalculating: this.scheduledCalcs.has(workspaceId) || this.pendingCalcs.has(workspaceId), + }; + } + + /** + * Schedule a consumer calculation (debounced). + * Batches rapid events (e.g., multiple tool-call-end) into single calculation. + * Marks as "calculating" immediately to prevent UI flash. + * + * If a calculation is already running, marks workspace for recalculation + * after the current one completes. + */ + scheduleCalculation(workspaceId: string, aggregator: StreamingMessageAggregator): void { + // Clear existing timer for this workspace + const existingTimer = this.debounceTimers.get(workspaceId); + if (existingTimer) { + clearTimeout(existingTimer); + } + + // If already executing, queue a follow-up recalculation + if (this.pendingCalcs.has(workspaceId)) { + this.needsRecalc.set(workspaceId, aggregator); + return; + } + + // Mark as scheduled immediately (triggers "Calculating..." UI, prevents flash) + const isNewSchedule = !this.scheduledCalcs.has(workspaceId); + this.scheduledCalcs.add(workspaceId); + + // Notify store if newly scheduled (triggers UI update) + if (isNewSchedule) { + this.onCalculationComplete(workspaceId); + } + + // Set new timer (150ms - imperceptible to humans, batches rapid events) + const timer = setTimeout(() => { + this.debounceTimers.delete(workspaceId); + this.scheduledCalcs.delete(workspaceId); // Move from scheduled to pending + this.executeCalculation(workspaceId, aggregator); + }, 150); + + this.debounceTimers.set(workspaceId, timer); + } + + /** + * Execute background consumer calculation. + * Only one calculation per workspace at a time. + */ + private executeCalculation(workspaceId: string, aggregator: StreamingMessageAggregator): void { + // Skip if already calculating + if (this.pendingCalcs.has(workspaceId)) { + return; + } + + this.pendingCalcs.add(workspaceId); + + // Mark as calculating and notify store + this.onCalculationComplete(workspaceId); + + // Run in next tick to avoid blocking caller + void (async () => { + try { + const messages = aggregator.getAllMessages(); + const model = aggregator.getCurrentModel() ?? "unknown"; + + // Calculate in Web Worker (off main thread) + const fullStats = await this.tokenWorker.calculate(messages, model); + + // Store result in cache + this.cache.set(workspaceId, { + consumers: fullStats.consumers, + tokenizerName: fullStats.tokenizerName, + totalTokens: fullStats.totalTokens, + isCalculating: false, + }); + + // Notify store to trigger re-render + this.onCalculationComplete(workspaceId); + } catch (error) { + // Cancellations are expected during rapid events - don't cache, don't log + // This allows lazy trigger to retry on next access + if (error instanceof Error && error.message === "Cancelled by newer request") { + return; + } + + // Real errors: log and cache empty result + console.error(`[WorkspaceConsumerManager] Calculation failed for ${workspaceId}:`, error); + this.cache.set(workspaceId, { + consumers: [], + tokenizerName: "", + totalTokens: 0, + isCalculating: false, + }); + this.onCalculationComplete(workspaceId); + } finally { + this.pendingCalcs.delete(workspaceId); + + // If recalculation was requested while we were running, schedule it now + const needsRecalcAggregator = this.needsRecalc.get(workspaceId); + if (needsRecalcAggregator) { + this.needsRecalc.delete(workspaceId); + this.scheduleCalculation(workspaceId, needsRecalcAggregator); + } + } + })(); + } + + /** + * Remove workspace state and cleanup timers. + */ + removeWorkspace(workspaceId: string): void { + // Clear debounce timer + const timer = this.debounceTimers.get(workspaceId); + if (timer) { + clearTimeout(timer); + this.debounceTimers.delete(workspaceId); + } + + // Clean up state + this.cache.delete(workspaceId); + this.scheduledCalcs.delete(workspaceId); + this.pendingCalcs.delete(workspaceId); + this.needsRecalc.delete(workspaceId); + } + + /** + * Cleanup all resources. + */ + dispose(): void { + // Clear all debounce timers + for (const timer of this.debounceTimers.values()) { + clearTimeout(timer); + } + this.debounceTimers.clear(); + + // Terminate worker + this.tokenWorker.terminate(); + + // Clear state + this.cache.clear(); + this.scheduledCalcs.clear(); + this.pendingCalcs.clear(); + } +} diff --git a/src/stores/WorkspaceStore.ts b/src/stores/WorkspaceStore.ts index 881e106fb..5e9b97778 100644 --- a/src/stores/WorkspaceStore.ts +++ b/src/stores/WorkspaceStore.ts @@ -23,6 +23,11 @@ import { isReasoningEnd, } from "@/types/ipc"; import { MapStore } from "./MapStore"; +import { createDisplayUsage } from "@/utils/tokens/tokenStatsCalculator"; +import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager"; +import type { ChatUsageDisplay } from "@/utils/tokens/usageAggregator"; +import type { TokenConsumer } from "@/types/chatStats"; +import type { LanguageModelV2Usage } from "@ai-sdk/provider"; export interface WorkspaceState { messages: DisplayedMessage[]; @@ -61,6 +66,26 @@ function extractSidebarState(aggregator: StreamingMessageAggregator): WorkspaceS */ type DerivedState = Record; +/** + * Usage metadata extracted from API responses (no tokenization). + * Updates instantly when usage metadata arrives. + */ +export interface WorkspaceUsageState { + usageHistory: ChatUsageDisplay[]; + totalTokens: number; +} + +/** + * Consumer breakdown requiring tokenization (lazy calculation). + * Updates after async Web Worker calculation completes. + */ +export interface WorkspaceConsumersState { + consumers: TokenConsumer[]; + tokenizerName: string; + totalTokens: number; // Total from tokenization (may differ from usage totalTokens) + isCalculating: boolean; +} + /** * External store for workspace aggregators and streaming state. * @@ -76,6 +101,15 @@ export class WorkspaceStore { // Derived aggregate state (computed from multiple workspaces) private derived = new MapStore(); + // Usage and consumer stores (two-store approach for CostsTab optimization) + private usageStore = new MapStore(); + private consumersStore = new MapStore(); + + // Manager for consumer calculations (debouncing, caching, lazy loading) + // Architecture: WorkspaceStore orchestrates (decides when), manager executes (performs calculations) + // Dual-cache: consumersStore (MapStore) handles subscriptions, manager owns data cache + private readonly consumerManager: WorkspaceConsumerManager; + // Supporting data structures private aggregators = new Map(); private ipcUnsubscribers = new Map void>(); @@ -95,6 +129,11 @@ export class WorkspaceStore { constructor(onModelUsed?: (model: string) => void) { this.onModelUsed = onModelUsed; + // Initialize consumer calculation manager + this.consumerManager = new WorkspaceConsumerManager((workspaceId) => { + this.consumersStore.bump(workspaceId); + }); + // Note: We DON'T auto-check recency on every state bump. // Instead, checkAndBumpRecencyIfChanged() is called explicitly after // message completion events (not on deltas) to prevent App.tsx re-renders. @@ -262,6 +301,105 @@ export class WorkspaceStore { return aggregator ? aggregator.getCurrentTodos() : []; } + /** + * Extract usage from messages (no tokenization). + * Each usage entry calculated with its own model for accurate costs. + */ + getWorkspaceUsage(workspaceId: string): WorkspaceUsageState { + return this.usageStore.get(workspaceId, () => { + const aggregator = this.getOrCreateAggregator(workspaceId); + const messages = aggregator.getAllMessages(); + + // Extract usage from assistant messages + const usageHistory: ChatUsageDisplay[] = []; + + for (const msg of messages) { + if (msg.role === "assistant" && msg.metadata?.usage) { + // Use the model from this specific message (not global) + const model = msg.metadata.model ?? aggregator.getCurrentModel() ?? "unknown"; + + const usage = createDisplayUsage( + msg.metadata.usage, + model, + msg.metadata.providerMetadata + ); + + if (usage) { + usageHistory.push(usage); + } + } + } + + // Calculate total from usage history + const totalTokens = usageHistory.reduce( + (sum, u) => + sum + + u.input.tokens + + u.cached.tokens + + u.cacheCreate.tokens + + u.output.tokens + + u.reasoning.tokens, + 0 + ); + + return { usageHistory, totalTokens }; + }); + } + + /** + * Get consumer breakdown (may be calculating). + * Triggers lazy calculation if workspace is caught-up but no data exists. + * + * Architecture: Lazy trigger runs on EVERY access (outside MapStore.get()) + * so workspace switches trigger calculation even if MapStore has cached result. + */ + getWorkspaceConsumers(workspaceId: string): WorkspaceConsumersState { + const aggregator = this.aggregators.get(workspaceId); + const isCaughtUp = this.caughtUp.get(workspaceId) ?? false; + + // Lazy trigger check (runs on EVERY access, not just when MapStore recomputes) + const cached = this.consumerManager.getCachedState(workspaceId); + const isPending = this.consumerManager.isPending(workspaceId); + + if (!cached && !isPending && isCaughtUp) { + if (aggregator && aggregator.getAllMessages().length > 0) { + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + } + } + + // Return state (MapStore handles subscriptions, delegates to manager for actual state) + return this.consumersStore.get(workspaceId, () => { + return this.consumerManager.getStateSync(workspaceId); + }); + } + + /** + * Subscribe to usage store changes for a specific workspace. + */ + subscribeUsage(workspaceId: string, listener: () => void): () => void { + return this.usageStore.subscribeKey(workspaceId, listener); + } + + /** + * Subscribe to consumer store changes for a specific workspace. + */ + subscribeConsumers(workspaceId: string, listener: () => void): () => void { + return this.consumersStore.subscribeKey(workspaceId, listener); + } + + /** + * Helper to bump usage store if metadata contains usage. + * Simplifies event handling logic and provides forward compatibility. + */ + private bumpUsageIfPresent( + workspaceId: string, + metadata?: { usage?: LanguageModelV2Usage; model?: string } + ): void { + if (metadata?.usage) { + this.usageStore.bump(workspaceId); + } + } + /** * Add a workspace and subscribe to its IPC events. */ @@ -301,6 +439,9 @@ export class WorkspaceStore { * Remove a workspace and clean up subscriptions. */ removeWorkspace(workspaceId: string): void { + // Clean up consumer manager state + this.consumerManager.removeWorkspace(workspaceId); + // Unsubscribe from IPC const unsubscribe = this.ipcUnsubscribers.get(workspaceId); if (unsubscribe) { @@ -310,6 +451,8 @@ export class WorkspaceStore { // Clean up state this.states.delete(workspaceId); + this.usageStore.delete(workspaceId); + this.consumersStore.delete(workspaceId); this.aggregators.delete(workspaceId); this.caughtUp.delete(workspaceId); this.historicalMessages.delete(workspaceId); @@ -345,12 +488,17 @@ export class WorkspaceStore { * Cleanup all subscriptions (call on unmount). */ dispose(): void { + // Clean up consumer manager + this.consumerManager.dispose(); + for (const unsubscribe of this.ipcUnsubscribers.values()) { unsubscribe(); } this.ipcUnsubscribers.clear(); this.states.clear(); this.derived.clear(); + this.usageStore.clear(); + this.consumersStore.clear(); this.aggregators.clear(); this.caughtUp.clear(); this.historicalMessages.clear(); @@ -403,6 +551,13 @@ export class WorkspaceStore { this.caughtUp.set(workspaceId, true); this.states.bump(workspaceId); this.checkAndBumpRecencyIfChanged(); // Messages loaded, update recency + + // Bump usage after loading history + this.usageStore.bump(workspaceId); + + // Queue consumer calculation in background + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + return; } @@ -423,6 +578,9 @@ export class WorkspaceStore { aggregator: StreamingMessageAggregator, data: WorkspaceChatMessage ): void { + // Bump usage if metadata present (forward compatible - works for any event type) + this.bumpUsageIfPresent(workspaceId, "metadata" in data ? data.metadata : undefined); + if (isStreamError(data)) { aggregator.handleStreamError(data); this.states.bump(workspaceId); @@ -524,6 +682,10 @@ export class WorkspaceStore { this.states.bump(workspaceId); this.checkAndBumpRecencyIfChanged(); // Stream ended, update recency + + // Queue consumer calculation in background + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + return; } @@ -536,6 +698,14 @@ export class WorkspaceStore { detail: { workspaceId }, }) ); + + this.bumpUsageIfPresent(workspaceId, data.metadata); + + // Recalculate consumers if usage updated (abort may have usage if stream completed) + if (data.metadata?.usage) { + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + } + return; } @@ -554,6 +724,11 @@ export class WorkspaceStore { if (isToolCallEnd(data)) { aggregator.handleToolCallEnd(data); this.states.bump(workspaceId); + + // Bump consumers on tool-end for real-time updates during streaming + // Tools complete before stream-end, so we want breakdown to update immediately + this.consumerManager.scheduleCalculation(workspaceId, aggregator); + return; } @@ -657,3 +832,27 @@ export function useWorkspaceAggregator(workspaceId: string) { const store = useWorkspaceStoreRaw(); return store.getAggregator(workspaceId); } + +/** + * Hook for usage metadata (instant, no tokenization). + * Updates immediately when usage metadata arrives from API responses. + */ +export function useWorkspaceUsage(workspaceId: string): WorkspaceUsageState { + const store = getStoreInstance(); + return useSyncExternalStore( + (listener) => store.subscribeUsage(workspaceId, listener), + () => store.getWorkspaceUsage(workspaceId) + ); +} + +/** + * Hook for consumer breakdown (lazy, with tokenization). + * Updates after async Web Worker calculation completes. + */ +export function useWorkspaceConsumers(workspaceId: string): WorkspaceConsumersState { + const store = getStoreInstance(); + return useSyncExternalStore( + (listener) => store.subscribeConsumers(workspaceId, listener), + () => store.getWorkspaceConsumers(workspaceId) + ); +} diff --git a/src/utils/tokens/tokenMeterUtils.ts b/src/utils/tokens/tokenMeterUtils.ts index fae341ea1..51caf8774 100644 --- a/src/utils/tokens/tokenMeterUtils.ts +++ b/src/utils/tokens/tokenMeterUtils.ts @@ -25,7 +25,7 @@ export interface TokenMeterData { interface SegmentDef { type: TokenSegment["type"]; - key: keyof ChatUsageDisplay; + key: "input" | "cached" | "cacheCreate" | "output" | "reasoning"; color: string; label: string; } diff --git a/src/utils/tokens/tokenStatsCalculator.test.ts b/src/utils/tokens/tokenStatsCalculator.test.ts new file mode 100644 index 000000000..18b029ad8 --- /dev/null +++ b/src/utils/tokens/tokenStatsCalculator.test.ts @@ -0,0 +1,108 @@ +import { describe, test, expect } from "@jest/globals"; +import { createDisplayUsage } from "./tokenStatsCalculator"; +import type { LanguageModelV2Usage } from "@ai-sdk/provider"; + +describe("createDisplayUsage", () => { + test("uses usage.reasoningTokens when available", () => { + const usage: LanguageModelV2Usage = { + inputTokens: 1000, + outputTokens: 500, + totalTokens: 1500, + reasoningTokens: 100, + }; + + const result = createDisplayUsage(usage, "openai:gpt-5-pro"); + + expect(result?.reasoning.tokens).toBe(100); + expect(result?.output.tokens).toBe(400); // 500 - 100 + }); + + test("falls back to providerMetadata.openai.reasoningTokens when usage.reasoningTokens is undefined", () => { + const usage: LanguageModelV2Usage = { + inputTokens: 1000, + outputTokens: 500, + totalTokens: 1500, + // reasoningTokens not provided + }; + + const providerMetadata = { + openai: { + reasoningTokens: 150, + responseId: "resp_123", + serviceTier: "default", + }, + }; + + const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata); + + expect(result?.reasoning.tokens).toBe(150); + expect(result?.output.tokens).toBe(350); // 500 - 150 + }); + + test("uses 0 when both usage.reasoningTokens and providerMetadata.openai.reasoningTokens are undefined", () => { + const usage: LanguageModelV2Usage = { + inputTokens: 1000, + outputTokens: 500, + totalTokens: 1500, + }; + + const providerMetadata = { + openai: { + responseId: "resp_123", + serviceTier: "default", + }, + }; + + const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata); + + expect(result?.reasoning.tokens).toBe(0); + expect(result?.output.tokens).toBe(500); // All output tokens + }); + + test("prefers usage.reasoningTokens over providerMetadata when both exist", () => { + const usage: LanguageModelV2Usage = { + inputTokens: 1000, + outputTokens: 500, + totalTokens: 1500, + reasoningTokens: 100, + }; + + const providerMetadata = { + openai: { + reasoningTokens: 999, // Should be ignored + responseId: "resp_123", + serviceTier: "default", + }, + }; + + const result = createDisplayUsage(usage, "openai:gpt-5-pro", providerMetadata); + + expect(result?.reasoning.tokens).toBe(100); // Uses usage, not providerMetadata + expect(result?.output.tokens).toBe(400); // 500 - 100 + }); + + test("works with non-OpenAI providers that don't have providerMetadata.openai", () => { + const usage: LanguageModelV2Usage = { + inputTokens: 1000, + outputTokens: 500, + totalTokens: 1500, + reasoningTokens: 200, + }; + + const providerMetadata = { + anthropic: { + cacheCreationInputTokens: 50, + }, + }; + + const result = createDisplayUsage( + usage, + "anthropic:claude-sonnet-4-20250514", + providerMetadata + ); + + expect(result?.reasoning.tokens).toBe(200); + expect(result?.output.tokens).toBe(300); // 500 - 200 + expect(result?.cacheCreate.tokens).toBe(50); // Anthropic metadata still works + }); +}); diff --git a/src/utils/tokens/tokenStatsCalculator.ts b/src/utils/tokens/tokenStatsCalculator.ts index a6e641e58..8507be873 100644 --- a/src/utils/tokens/tokenStatsCalculator.ts +++ b/src/utils/tokens/tokenStatsCalculator.ts @@ -1,6 +1,6 @@ /** * Shared token statistics calculation logic - * Used by both frontend (ChatContext) and backend (debug commands) + * Used by both frontend (WorkspaceStore) and backend (debug commands) * * IMPORTANT: This utility is intentionally abstracted so that the debug command * (`bun debug costs`) has exact parity with the UI display in the Costs tab. @@ -45,11 +45,14 @@ export function createDisplayUsage( (providerMetadata?.anthropic as { cacheCreationInputTokens?: number } | undefined) ?.cacheCreationInputTokens ?? 0; + // Extract reasoning tokens with fallback to provider metadata (OpenAI-specific) + const reasoningTokens = + usage.reasoningTokens ?? + (providerMetadata?.openai as { reasoningTokens?: number } | undefined)?.reasoningTokens ?? + 0; + // Calculate output tokens excluding reasoning - const outputWithoutReasoning = Math.max( - 0, - (usage.outputTokens ?? 0) - (usage.reasoningTokens ?? 0) - ); + const outputWithoutReasoning = Math.max(0, (usage.outputTokens ?? 0) - reasoningTokens); // Get model stats for cost calculation const modelStats = getModelStats(model); @@ -66,7 +69,7 @@ export function createDisplayUsage( cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0); cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0); outputCost = outputWithoutReasoning * modelStats.output_cost_per_token; - reasoningCost = (usage.reasoningTokens ?? 0) * modelStats.output_cost_per_token; + reasoningCost = reasoningTokens * modelStats.output_cost_per_token; } return { @@ -87,9 +90,10 @@ export function createDisplayUsage( cost_usd: outputCost, }, reasoning: { - tokens: usage.reasoningTokens ?? 0, + tokens: reasoningTokens, cost_usd: reasoningCost, }, + model, // Include model for display purposes }; } diff --git a/src/utils/tokens/usageAggregator.ts b/src/utils/tokens/usageAggregator.ts index 61a439c60..afd9d1849 100644 --- a/src/utils/tokens/usageAggregator.ts +++ b/src/utils/tokens/usageAggregator.ts @@ -26,6 +26,9 @@ export interface ChatUsageDisplay { // totalOutput = output + reasoning output: ChatUsageComponent; reasoning: ChatUsageComponent; + + // Optional model field for display purposes (context window calculation, etc.) + model?: string; } /** @@ -48,7 +51,14 @@ export function sumUsageHistory(usageHistory: ChatUsageDisplay[]): ChatUsageDisp for (const usage of usageHistory) { // Iterate over each component and sum tokens and costs - for (const key of Object.keys(sum) as Array) { + const componentKeys: Array<"input" | "cached" | "cacheCreate" | "output" | "reasoning"> = [ + "input", + "cached", + "cacheCreate", + "output", + "reasoning", + ]; + for (const key of componentKeys) { sum[key].tokens += usage[key].tokens; if (usage[key].cost_usd === undefined) { hasUndefinedCosts = true;