Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .storybook/mocks/orpc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl
terminate: async () => ({ success: true, data: undefined }),
sendToBackground: async () => ({ success: true, data: undefined }),
},
getSessionUsage: async () => undefined,
},
window: {
setTitle: async () => undefined,
Expand Down
20 changes: 10 additions & 10 deletions src/browser/components/RightSidebar/CostsTab.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import React from "react";
import { useWorkspaceUsage, useWorkspaceConsumers } from "@/browser/stores/WorkspaceStore";
import { getModelStats } from "@/common/utils/tokens/modelStats";
import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { usePersistedState } from "@/browser/hooks/usePersistedState";
import { ToggleGroup, type ToggleOption } from "../ToggleGroup";
import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
Expand Down Expand Up @@ -83,17 +83,17 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
useAutoCompactionSettings(workspaceId, currentModel);

// Session usage for cost calculation
// Uses usageHistory (total across all steps) + liveCostUsage (cumulative during streaming)
// Uses sessionTotal (pre-computed) + liveCostUsage (cumulative during streaming)
const sessionUsage = React.useMemo(() => {
const historicalSum = sumUsageHistory(usage.usageHistory);
if (!usage.liveCostUsage) return historicalSum;
if (!historicalSum) return usage.liveCostUsage;
return sumUsageHistory([historicalSum, usage.liveCostUsage]);
}, [usage.usageHistory, usage.liveCostUsage]);
const parts: ChatUsageDisplay[] = [];
if (usage.sessionTotal) parts.push(usage.sessionTotal);
if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
return parts.length > 0 ? sumUsageHistory(parts) : undefined;
}, [usage.sessionTotal, usage.liveCostUsage]);

const hasUsageData =
usage &&
(usage.usageHistory.length > 0 ||
(usage.sessionTotal !== undefined ||
usage.lastContextUsage !== undefined ||
usage.liveUsage !== undefined);
const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating);
Expand All @@ -111,8 +111,8 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
);
}

// Last Request (for Cost section): always the last completed request
const lastRequestUsage = usage.usageHistory[usage.usageHistory.length - 1];
// Last Request (for Cost section): from persisted data
const lastRequestUsage = usage.lastRequest?.usage;

// Cost and Details table use viewMode
const displayUsage = viewMode === "last-request" ? lastRequestUsage : sessionUsage;
Expand Down
1 change: 1 addition & 0 deletions src/browser/contexts/WorkspaceContext.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ function createMockAPI(options: MockAPIOptions = {}) {
);
})
),
getSessionUsage: mock(options.workspace?.getSessionUsage ?? (() => Promise.resolve(undefined))),
onChat: mock(
options.workspace?.onChat ??
(async () => {
Expand Down
3 changes: 3 additions & 0 deletions src/browser/stores/WorkspaceStore.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ const mockOnChat = mock(async function* (): AsyncGenerator<WorkspaceChatMessage,
await Promise.resolve();
});

const mockGetSessionUsage = mock(() => Promise.resolve(undefined));

const mockClient = {
workspace: {
onChat: mockOnChat,
getSessionUsage: mockGetSessionUsage,
},
};

Expand Down
111 changes: 76 additions & 35 deletions src/browser/stores/WorkspaceStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@ import {
} from "@/common/orpc/types";
import type { StreamEndEvent, StreamAbortEvent } from "@/common/types/stream";
import { MapStore } from "./MapStore";
import { collectUsageHistory, createDisplayUsage } from "@/common/utils/tokens/displayUsage";
import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
import type { TokenConsumer } from "@/common/types/chatStats";
import { normalizeGatewayModel } from "@/common/utils/ai/models";
import type { z } from "zod";
import type { SessionUsageFileSchema } from "@/common/orpc/schemas/chatStats";
import type { LanguageModelV2Usage } from "@ai-sdk/provider";
import { createFreshRetryState } from "@/browser/utils/messages/retryState";
import { trackStreamCompleted } from "@/common/telemetry";
Expand Down Expand Up @@ -65,12 +69,19 @@ type DerivedState = Record<string, number>;
* Updates instantly when usage metadata arrives.
*
* For multi-step tool calls, cost and context usage differ:
* - usageHistory: Total usage per message (sum of all steps) for cost calculation
* - sessionTotal: Pre-computed sum of all models from session-usage.json
* - lastRequest: Last completed request (persisted for app restart)
* - lastContextUsage: Last step's usage for context window display (inputTokens = actual context size)
*/
export interface WorkspaceUsageState {
/** Usage history for cost calculation (total across all steps per message) */
usageHistory: ChatUsageDisplay[];
/** Pre-computed session total (sum of all models) */
sessionTotal?: ChatUsageDisplay;
/** Last completed request (persisted) */
lastRequest?: {
model: string;
usage: ChatUsageDisplay;
timestamp: number;
};
/** Last message's context usage (last step only, for context window display) */
lastContextUsage?: ChatUsageDisplay;
totalTokens: number;
Expand Down Expand Up @@ -124,6 +135,8 @@ export class WorkspaceStore {
private pendingStreamEvents = new Map<string, WorkspaceChatMessage[]>();
private workspaceMetadata = new Map<string, FrontendWorkspaceMetadata>(); // Store metadata for name lookup
private queuedMessages = new Map<string, QueuedMessage | null>(); // Cached queued messages
// Cumulative session usage (from session-usage.json)
private sessionUsage = new Map<string, z.infer<typeof SessionUsageFileSchema>>();

// Idle callback handles for high-frequency delta events to reduce re-renders during streaming.
// Data is always updated immediately in the aggregator; only UI notification is scheduled.
Expand Down Expand Up @@ -172,6 +185,26 @@ export class WorkspaceStore {
// Reset retry state on successful stream completion
updatePersistedState(getRetryStateKey(workspaceId), createFreshRetryState());

// Update local session usage (mirrors backend's addUsage)
const model = streamEndData.metadata?.model;
const rawUsage = streamEndData.metadata?.usage;
const providerMetadata = streamEndData.metadata?.providerMetadata;
if (model && rawUsage) {
const usage = createDisplayUsage(rawUsage, model, providerMetadata);
if (usage) {
const normalizedModel = normalizeGatewayModel(model);
const current = this.sessionUsage.get(workspaceId) ?? {
byModel: {},
version: 1 as const,
};
const existing = current.byModel[normalizedModel];
// CRITICAL: Accumulate, don't overwrite (same logic as backend)
current.byModel[normalizedModel] = existing ? sumUsageHistory([existing, usage])! : usage;
current.lastRequest = { model: normalizedModel, usage, timestamp: Date.now() };
this.sessionUsage.set(workspaceId, current);
}
}

// Flush any pending debounced bump before final bump to avoid double-bump
this.cancelPendingIdleBump(workspaceId);
this.states.bump(workspaceId);
Expand Down Expand Up @@ -548,48 +581,45 @@ export class WorkspaceStore {
}

/**
* Extract usage from messages (no tokenization).
* Each usage entry calculated with its own model for accurate costs.
* Extract usage from session-usage.json (no tokenization or message iteration).
*
* Returns empty state if workspace doesn't exist (e.g., creation mode).
*/
getWorkspaceUsage(workspaceId: string): WorkspaceUsageState {
return this.usageStore.get(workspaceId, () => {
const aggregator = this.aggregators.get(workspaceId);
if (!aggregator) {
return { usageHistory: [], totalTokens: 0 };
return { totalTokens: 0 };
}

const messages = aggregator.getAllMessages();
const model = aggregator.getCurrentModel();
const sessionData = this.sessionUsage.get(workspaceId);

// Collect usage history for cost calculation (total across all steps per message)
const usageHistory = collectUsageHistory(messages, model);

// Calculate total from usage history (now includes historical)
const totalTokens = usageHistory.reduce(
(sum, u) =>
sum +
u.input.tokens +
u.cached.tokens +
u.cacheCreate.tokens +
u.output.tokens +
u.reasoning.tokens,
0
);
// Session total: sum all models from persisted data
const sessionTotal =
sessionData && Object.keys(sessionData.byModel).length > 0
? sumUsageHistory(Object.values(sessionData.byModel))
: undefined;

// Last request from persisted data
const lastRequest = sessionData?.lastRequest;

// Get last message's context usage for context window display
// Uses contextUsage (last step) if available, falls back to usage for old messages
// Skips compacted messages - their usage reflects pre-compaction context, not current
// Calculate total tokens from session total
const totalTokens = sessionTotal
? sessionTotal.input.tokens +
sessionTotal.cached.tokens +
sessionTotal.cacheCreate.tokens +
sessionTotal.output.tokens +
sessionTotal.reasoning.tokens
: 0;

// Get last message's context usage (unchanged from before)
const messages = aggregator.getAllMessages();
const lastContextUsage = (() => {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (msg.role === "assistant") {
// Skip compacted messages - their usage is from pre-compaction context
// and doesn't reflect current context window size
if (msg.metadata?.compacted) {
continue;
}
if (msg.metadata?.compacted) continue;
const rawUsage = msg.metadata?.contextUsage;
const providerMeta =
msg.metadata?.contextProviderMetadata ?? msg.metadata?.providerMetadata;
Expand All @@ -602,10 +632,8 @@ export class WorkspaceStore {
return undefined;
})();

// Include active stream usage if currently streaming
// Live streaming data (unchanged)
const activeStreamId = aggregator.getActiveStreamMessageId();

// Live context usage (last step's inputTokens = current context window)
const rawContextUsage = activeStreamId
? aggregator.getActiveStreamUsage(activeStreamId)
: undefined;
Expand All @@ -617,7 +645,6 @@ export class WorkspaceStore {
? createDisplayUsage(rawContextUsage, model, rawStepProviderMetadata)
: undefined;

// Live cost usage (cumulative across all steps, with accumulated cache creation tokens)
const rawCumulativeUsage = activeStreamId
? aggregator.getActiveStreamCumulativeUsage(activeStreamId)
: undefined;
Expand All @@ -629,7 +656,7 @@ export class WorkspaceStore {
? createDisplayUsage(rawCumulativeUsage, model, rawCumulativeProviderMetadata)
: undefined;

return { usageHistory, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
return { sessionTotal, lastRequest, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
});
}

Expand Down Expand Up @@ -793,6 +820,19 @@ export class WorkspaceStore {
})();

this.ipcUnsubscribers.set(workspaceId, () => controller.abort());

// Fetch persisted session usage (fire-and-forget)
this.client.workspace
.getSessionUsage({ workspaceId })
.then((data) => {
if (data) {
this.sessionUsage.set(workspaceId, data);
this.usageStore.bump(workspaceId);
}
})
.catch((error) => {
console.warn(`Failed to fetch session usage for ${workspaceId}:`, error);
});
} else {
console.warn(`[WorkspaceStore] No ORPC client available for workspace ${workspaceId}`);
}
Expand Down Expand Up @@ -831,6 +871,7 @@ export class WorkspaceStore {
this.previousSidebarValues.delete(workspaceId);
this.sidebarStateCache.delete(workspaceId);
this.workspaceCreatedAt.delete(workspaceId);
this.sessionUsage.delete(workspaceId);
}

/**
Expand Down
39 changes: 13 additions & 26 deletions src/browser/utils/compaction/autoCompactionCheck.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,14 @@ const createUsageEntry = (
// Helper to create mock WorkspaceUsageState
const createMockUsage = (
lastEntryTokens: number,
historicalTokens?: number,
_historicalTokens?: number, // Kept for backward compat but unused (session-usage.json handles historical)
model: string = KNOWN_MODELS.SONNET.id,
liveUsage?: ChatUsageDisplay
): WorkspaceUsageState => {
const usageHistory: ChatUsageDisplay[] = [];
// Create lastContextUsage representing the most recent context window state
const lastContextUsage = createUsageEntry(lastEntryTokens, model);

if (historicalTokens !== undefined) {
// Add historical usage (from compaction)
usageHistory.push(createUsageEntry(historicalTokens, "historical-model"));
}

// Add recent usage
const recentUsage = createUsageEntry(lastEntryTokens, model);
usageHistory.push(recentUsage);

// lastContextUsage is the most recent context window state
return { usageHistory, lastContextUsage: recentUsage, totalTokens: 0, liveUsage };
return { lastContextUsage, totalTokens: 0, liveUsage };
};

describe("checkAutoCompaction", () => {
Expand All @@ -60,8 +51,8 @@ describe("checkAutoCompaction", () => {
expect(result.thresholdPercentage).toBe(70);
});

test("returns false when usage history is empty", () => {
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 };
test("returns false when no context usage data", () => {
const usage: WorkspaceUsageState = { totalTokens: 0 };
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.shouldShowWarning).toBe(false);
Expand Down Expand Up @@ -146,7 +137,6 @@ describe("checkAutoCompaction", () => {
model: KNOWN_MODELS.SONNET.id,
};
const usage: WorkspaceUsageState = {
usageHistory: [usageEntry],
lastContextUsage: usageEntry,
totalTokens: 0,
};
Expand Down Expand Up @@ -195,16 +185,16 @@ describe("checkAutoCompaction", () => {
});

describe("Edge Cases", () => {
test("empty usageHistory array returns safe defaults", () => {
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 };
test("missing context usage returns safe defaults", () => {
const usage: WorkspaceUsageState = { totalTokens: 0 };
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.shouldShowWarning).toBe(false);
expect(result.usagePercentage).toBe(0);
expect(result.thresholdPercentage).toBe(70);
});

test("single entry in usageHistory works correctly", () => {
test("single context usage entry works correctly", () => {
const usage = createMockUsage(140_000);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

Expand Down Expand Up @@ -242,7 +232,6 @@ describe("checkAutoCompaction", () => {
model: KNOWN_MODELS.SONNET.id,
};
const usage: WorkspaceUsageState = {
usageHistory: [zeroEntry],
lastContextUsage: zeroEntry,
totalTokens: 0,
};
Expand Down Expand Up @@ -356,24 +345,22 @@ describe("checkAutoCompaction", () => {
expect(result.shouldForceCompact).toBe(true);
});

test("shouldForceCompact triggers with empty history but liveUsage at force threshold", () => {
test("shouldForceCompact triggers with liveUsage at force threshold (no lastContextUsage)", () => {
const liveUsage = createUsageEntry(150_000); // 75%
const usage: WorkspaceUsageState = {
usageHistory: [],
totalTokens: 0,
liveUsage,
};
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);

expect(result.shouldForceCompact).toBe(true);
expect(result.usagePercentage).toBe(75); // usagePercentage reflects live even with empty history
expect(result.usagePercentage).toBe(75); // usagePercentage reflects live
});

test("shouldShowWarning uses live usage when no history exists", () => {
// No lastUsage, liveUsage at 65% - should show warning (65% >= 60%)
test("shouldShowWarning uses live usage when no lastContextUsage exists", () => {
// No lastContextUsage, liveUsage at 65% - should show warning (65% >= 60%)
const liveUsage = createUsageEntry(130_000); // 65%
const usage: WorkspaceUsageState = {
usageHistory: [],
totalTokens: 0,
liveUsage,
};
Expand Down
1 change: 1 addition & 0 deletions src/cli/cli.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ async function createTestServer(authToken?: string): Promise<TestServerHandle> {
menuEventService: services.menuEventService,
voiceService: services.voiceService,
telemetryService: services.telemetryService,
sessionUsageService: services.sessionUsageService,
};

// Use the actual createOrpcServer function
Expand Down
1 change: 1 addition & 0 deletions src/cli/server.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ async function createTestServer(): Promise<TestServerHandle> {
menuEventService: services.menuEventService,
voiceService: services.voiceService,
telemetryService: services.telemetryService,
sessionUsageService: services.sessionUsageService,
};

// Use the actual createOrpcServer function
Expand Down
Loading