From 20299007b4787c9a047daf661ff881e84d5901d3 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 12:56:35 +1100 Subject: [PATCH 1/9] feat: add auto-compaction with progressive warnings --- src/browser/components/AIView.tsx | 25 +++- src/browser/components/ChatInput/index.tsx | 123 +++++++++++++----- src/browser/components/ChatInput/types.ts | 2 + src/browser/components/CompactionWarning.tsx | 36 +++++ src/browser/hooks/useResumeManager.ts | 5 +- src/browser/stores/WorkspaceStore.ts | 33 +++-- src/browser/utils/chatCommands.ts | 23 +++- .../utils/compaction/autoCompactionCheck.ts | 87 +++++++++++++ .../messages/StreamingMessageAggregator.ts | 5 +- src/common/types/message.ts | 8 +- src/node/services/agentSession.ts | 3 +- 11 files changed, 294 insertions(+), 56 deletions(-) create mode 100644 src/browser/components/CompactionWarning.tsx create mode 100644 src/browser/utils/compaction/autoCompactionCheck.ts diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 219ad9b3a..314d69047 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -23,7 +23,11 @@ import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds"; import { useAutoScroll } from "@/browser/hooks/useAutoScroll"; import { usePersistedState } from "@/browser/hooks/usePersistedState"; import { useThinking } from "@/browser/contexts/ThinkingContext"; -import { useWorkspaceState, useWorkspaceAggregator } from "@/browser/stores/WorkspaceStore"; +import { + useWorkspaceState, + useWorkspaceAggregator, + useWorkspaceUsage, +} from "@/browser/stores/WorkspaceStore"; import { WorkspaceHeader } from "./WorkspaceHeader"; import { getModelName } from "@/common/utils/ai/models"; import type { DisplayedMessage } from "@/common/types/message"; @@ -31,6 +35,9 @@ import type { RuntimeConfig } from "@/common/types/runtime"; import { useAIViewKeybinds } from "@/browser/hooks/useAIViewKeybinds"; import { evictModelFromLRU } from "@/browser/hooks/useModelLRU"; import { QueuedMessage } from "./Messages/QueuedMessage"; +import { CompactionWarning } from "./CompactionWarning"; +import { shouldAutoCompact } from "@/browser/utils/compaction/autoCompactionCheck"; +import { use1MContext } from "@/browser/hooks/use1MContext"; interface AIViewProps { workspaceId: string; @@ -74,6 +81,8 @@ const AIViewInner: React.FC = ({ const workspaceState = useWorkspaceState(workspaceId); const aggregator = useWorkspaceAggregator(workspaceId); + const workspaceUsage = useWorkspaceUsage(workspaceId); + const [use1M] = use1MContext(); const handledModelErrorsRef = useRef>(new Set()); useEffect(() => { @@ -318,6 +327,13 @@ const AIViewInner: React.FC = ({ // Get active stream message ID for token counting const activeStreamMessageId = aggregator.getActiveStreamMessageId(); + const autoCompactionCheck = currentModel + ? shouldAutoCompact(workspaceUsage, currentModel, use1M) + : { shouldShowWarning: false, usagePercentage: 0, thresholdPercentage: 70 }; + + // Show warning when: shouldShowWarning flag is true AND not currently compacting + const shouldShowCompactionWarning = !isCompacting && autoCompactionCheck.shouldShowWarning; + // Note: We intentionally do NOT reset autoRetry when streams start. // If user pressed the interrupt key, autoRetry stays false until they manually retry. // This makes state transitions explicit and predictable. @@ -503,6 +519,12 @@ const AIViewInner: React.FC = ({ )} + {shouldShowCompactionWarning && ( + + )} = ({ onEditLastUserMessage={() => void handleEditLastUserMessage()} canInterrupt={canInterrupt} onReady={handleChatInputReady} + autoCompactionCheck={autoCompactionCheck} /> diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index a82205f15..507520c8a 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -30,6 +30,7 @@ import { handleCompactCommand, forkWorkspace, prepareCompactionMessage, + executeCompaction, type CommandHandlerContext, } from "@/browser/utils/chatCommands"; import { CUSTOM_EVENTS } from "@/common/constants/events"; @@ -472,6 +473,32 @@ export const ChatInput: React.FC = (props) => { // Workspace variant: full command handling + message send if (variant !== "workspace") return; // Type guard + // Prepare image parts if any + const imageParts = imageAttachments.map((img, index) => { + // Validate before sending to help with debugging + if (!img.url || typeof img.url !== "string") { + console.error( + `Image attachment [${index}] has invalid url:`, + typeof img.url, + img.url?.slice(0, 50) + ); + } + if (!img.url?.startsWith("data:")) { + console.error(`Image attachment [${index}] url is not a data URL:`, img.url?.slice(0, 100)); + } + if (!img.mediaType || typeof img.mediaType !== "string") { + console.error( + `Image attachment [${index}] has invalid mediaType:`, + typeof img.mediaType, + img.mediaType + ); + } + return { + url: img.url, + mediaType: img.mediaType, + }; + }); + try { // Parse command const parsed = parseCommand(messageText); @@ -571,8 +598,10 @@ export const ChatInput: React.FC = (props) => { const context: CommandHandlerContext = { workspaceId: props.workspaceId, sendMessageOptions, + imageParts, editMessageId: editingMessage?.id, setInput, + setImageAttachments, setIsSending, setToast, onCancelEdit: props.onCancelEdit, @@ -636,7 +665,9 @@ export const ChatInput: React.FC = (props) => { const context: CommandHandlerContext = { workspaceId: props.workspaceId, sendMessageOptions, + imageParts: undefined, // /new doesn't use images setInput, + setImageAttachments, setIsSending, setToast, }; @@ -656,42 +687,70 @@ export const ChatInput: React.FC = (props) => { } } - // Regular message - send directly via API - setIsSending(true); - // Save current state for restoration on error const previousImageAttachments = [...imageAttachments]; - try { - // Prepare image parts if any - const imageParts = imageAttachments.map((img, index) => { - // Validate before sending to help with debugging - if (!img.url || typeof img.url !== "string") { - console.error( - `Image attachment [${index}] has invalid url:`, - typeof img.url, - img.url?.slice(0, 50) - ); - } - if (!img.url?.startsWith("data:")) { - console.error( - `Image attachment [${index}] url is not a data URL:`, - img.url?.slice(0, 100) - ); - } - if (!img.mediaType || typeof img.mediaType !== "string") { - console.error( - `Image attachment [${index}] has invalid mediaType:`, - typeof img.mediaType, - img.mediaType - ); + // Auto-compaction check (workspace variant only) + // Check if we should auto-compact before sending this message + // Result is computed in parent (AIView) and passed down to avoid duplicate calculation + const shouldAutoCompact = + props.autoCompactionCheck && + props.autoCompactionCheck.usagePercentage >= props.autoCompactionCheck.thresholdPercentage; + if (variant === "workspace" && !editingMessage && shouldAutoCompact) { + // Clear input immediately for responsive UX + setInput(""); + setImageAttachments([]); + setIsSending(true); + + try { + const result = await executeCompaction({ + workspaceId: props.workspaceId, + continueMessage: { + text: messageText, + imageParts, + }, + sendMessageOptions, + }); + + if (!result.success) { + // Restore on error + setInput(messageText); + setImageAttachments(previousImageAttachments); + setToast({ + id: Date.now().toString(), + type: "error", + title: "Auto-Compaction Failed", + message: result.error ?? "Failed to start auto-compaction", + }); + } else { + setToast({ + id: Date.now().toString(), + type: "success", + message: `Context threshold reached - auto-compacting...`, + }); } - return { - url: img.url, - mediaType: img.mediaType, - }; - }); + } catch (error) { + // Restore on unexpected error + setInput(messageText); + setImageAttachments(previousImageAttachments); + setToast({ + id: Date.now().toString(), + type: "error", + title: "Auto-Compaction Failed", + message: + error instanceof Error ? error.message : "Unexpected error during auto-compaction", + }); + } finally { + setIsSending(false); + } + return; // Skip normal send + } + + // Regular message - send directly via API + setIsSending(true); + + try { // When editing a /compact command, regenerate the actual summarization request let actualMessageText = messageText; let muxMetadata: MuxFrontendMetadata | undefined; @@ -707,7 +766,7 @@ export const ChatInput: React.FC = (props) => { } = prepareCompactionMessage({ workspaceId: props.workspaceId, maxOutputTokens: parsed.maxOutputTokens, - continueMessage: parsed.continueMessage, + continueMessage: { text: parsed.continueMessage ?? "", imageParts }, model: parsed.model, sendMessageOptions, }); diff --git a/src/browser/components/ChatInput/types.ts b/src/browser/components/ChatInput/types.ts index 25f7979c9..324c6e12d 100644 --- a/src/browser/components/ChatInput/types.ts +++ b/src/browser/components/ChatInput/types.ts @@ -1,5 +1,6 @@ import type { ImagePart } from "@/common/types/ipc"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; +import type { AutoCompactionCheckResult } from "@/browser/utils/compaction/autoCompactionCheck"; export interface ChatInputAPI { focus: () => void; @@ -23,6 +24,7 @@ export interface ChatInputWorkspaceVariant { canInterrupt?: boolean; disabled?: boolean; onReady?: (api: ChatInputAPI) => void; + autoCompactionCheck?: AutoCompactionCheckResult; // Computed in parent (AIView) to avoid duplicate calculation } // Creation variant: simplified for first message / workspace creation diff --git a/src/browser/components/CompactionWarning.tsx b/src/browser/components/CompactionWarning.tsx new file mode 100644 index 000000000..7688f1bad --- /dev/null +++ b/src/browser/components/CompactionWarning.tsx @@ -0,0 +1,36 @@ +import React from "react"; + +/** + * Warning banner shown when context usage is approaching the compaction threshold. + * + * Displays progressive warnings: + * - Below threshold: "Context left until Auto-Compact: X% remaining" (where X = threshold - current) + * - At/above threshold: "Approaching context limit. Next message will trigger auto-compaction." + * + * Displayed above ChatInput when: + * - Token usage >= (threshold - 10%) of model's context window + * - Not currently compacting (user can still send messages) + * + * @param usagePercentage - Current token usage as percentage (0-100) + * @param thresholdPercentage - Auto-compaction trigger threshold (0-100, default 70) + */ +export const CompactionWarning: React.FC<{ + usagePercentage: number; + thresholdPercentage: number; +}> = (props) => { + // At threshold or above, next message will trigger compaction + const willCompactNext = props.usagePercentage >= props.thresholdPercentage; + + // Calculate remaining percentage until threshold + const remaining = props.thresholdPercentage - props.usagePercentage; + + const message = willCompactNext + ? "⚠️ Context limit reached. Next message will trigger auto-compaction." + : `Context left until Auto-Compact: ${Math.round(remaining)}%`; + + return ( +
+ {message} +
+ ); +}; diff --git a/src/browser/hooks/useResumeManager.ts b/src/browser/hooks/useResumeManager.ts index 507ab7523..afe5a0fcb 100644 --- a/src/browser/hooks/useResumeManager.ts +++ b/src/browser/hooks/useResumeManager.ts @@ -171,7 +171,10 @@ export function useResumeManager() { if (lastUserMsg?.compactionRequest) { // Apply compaction overrides using shared function (same as ChatInput) // This ensures custom model/tokens are preserved across resume - options = applyCompactionOverrides(options, lastUserMsg.compactionRequest.parsed); + options = applyCompactionOverrides(options, { + maxOutputTokens: lastUserMsg.compactionRequest.parsed.maxOutputTokens, + continueMessage: { text: lastUserMsg.compactionRequest.parsed.continueMessage ?? "" }, + }); } } diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 136d8f8eb..d15f4a09a 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -424,27 +424,34 @@ export class WorkspaceStore { * Extract usage from messages (no tokenization). * Each usage entry calculated with its own model for accurate costs. * - * REQUIRES: Workspace must have been added via addWorkspace() first. + * Returns empty state if workspace doesn't exist (e.g., creation mode). */ getWorkspaceUsage(workspaceId: string): WorkspaceUsageState { return this.usageStore.get(workspaceId, () => { - const aggregator = this.assertGet(workspaceId); + const aggregator = this.aggregators.get(workspaceId); + if (!aggregator) { + return { usageHistory: [], totalTokens: 0 }; + } const messages = aggregator.getAllMessages(); const model = aggregator.getCurrentModel(); const usageHistory = collectUsageHistory(messages, model); - // Calculate total from usage history (now includes historical) - const totalTokens = usageHistory.reduce( - (sum, u) => - sum + - u.input.tokens + - u.cached.tokens + - u.cacheCreate.tokens + - u.output.tokens + - u.reasoning.tokens, - 0 - ); + const messages = aggregator.getAllMessages(); + const model = aggregator.getCurrentModel(); + const usageHistory = cumUsageHistory(messages, model); + + // Use last entry's total (each entry is cumulative, not a delta) + // Each usageHistory entry contains the FULL prompt tokens for that turn, + // so we only need the most recent value, not a sum + const lastEntry = usageHistory[usageHistory.length - 1]; + const totalTokens = lastEntry + ? lastEntry.input.tokens + + lastEntry.cached.tokens + + lastEntry.cacheCreate.tokens + + lastEntry.output.tokens + + lastEntry.reasoning.tokens + : 0; return { usageHistory, totalTokens }; }); diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts index 39f63800b..e388f7894 100644 --- a/src/browser/utils/chatCommands.ts +++ b/src/browser/utils/chatCommands.ts @@ -6,8 +6,12 @@ * to ensure consistent behavior and avoid duplication. */ -import type { SendMessageOptions } from "@/common/types/ipc"; -import type { MuxFrontendMetadata, CompactionRequestData } from "@/common/types/message"; +import type { SendMessageOptions, ImagePart } from "@/common/types/ipc"; +import type { + MuxFrontendMetadata, + CompactionRequestData, + ContinueMessage, +} from "@/common/types/message"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; import type { RuntimeConfig } from "@/common/types/runtime"; import { RUNTIME_MODE, SSH_RUNTIME_PREFIX } from "@/common/types/runtime"; @@ -17,6 +21,7 @@ import type { ParsedCommand } from "@/browser/utils/slashCommands/types"; import { applyCompactionOverrides } from "@/browser/utils/messages/compactionOptions"; import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference"; import { getRuntimeKey } from "@/common/constants/storage"; +import type { ImageAttachment } from "../components/ImageAttachments"; // ============================================================================ // Workspace Creation @@ -177,7 +182,7 @@ export { forkWorkspace } from "./workspaceFork"; export interface CompactionOptions { workspaceId: string; maxOutputTokens?: number; - continueMessage?: string; + continueMessage?: ContinueMessage; model?: string; sendMessageOptions: SendMessageOptions; editMessageId?: string; @@ -203,7 +208,7 @@ export function prepareCompactionMessage(options: CompactionOptions): { let messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Use approximately ${targetWords} words.`; if (options.continueMessage) { - messageText += `\n\nThe user wants to continue with: ${options.continueMessage}`; + messageText += `\n\nThe user wants to continue with: ${options.continueMessage.text}`; } // Handle model preference (sticky globally) @@ -267,7 +272,7 @@ function formatCompactionCommand(options: CompactionOptions): string { cmd += ` -m ${options.model}`; } if (options.continueMessage) { - cmd += `\n${options.continueMessage}`; + cmd += `\n${options.continueMessage.text}`; } return cmd; } @@ -279,8 +284,10 @@ function formatCompactionCommand(options: CompactionOptions): string { export interface CommandHandlerContext { workspaceId: string; sendMessageOptions: SendMessageOptions; + imageParts?: ImagePart[]; editMessageId?: string; setInput: (value: string) => void; + setImageAttachments: (images: ImageAttachment[]) => void; setIsSending: (value: boolean) => void; setToast: (toast: Toast) => void; onCancelEdit?: () => void; @@ -394,19 +401,23 @@ export async function handleCompactCommand( sendMessageOptions, editMessageId, setInput, + setImageAttachments, setIsSending, setToast, onCancelEdit, } = context; setInput(""); + setImageAttachments([]); setIsSending(true); try { const result = await executeCompaction({ workspaceId, maxOutputTokens: parsed.maxOutputTokens, - continueMessage: parsed.continueMessage, + continueMessage: parsed.continueMessage + ? { text: parsed.continueMessage, imageParts: context.imageParts } + : undefined, model: parsed.model, sendMessageOptions, editMessageId, diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts new file mode 100644 index 000000000..4369eadc4 --- /dev/null +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -0,0 +1,87 @@ +/** + * Auto-compaction threshold checking + * + * Determines whether auto-compaction should trigger based on current token usage + * as a percentage of the model's context window. + * + * Auto-compaction triggers when: + * - Usage data is available (has at least one API response) + * - Model has known max_input_tokens + * - Usage exceeds threshold (default 70%) + * + * Safe defaults: + * - Returns false if no usage data (first message) + * - Returns false if model stats unavailable (unknown model) + * - Never triggers in edit mode (caller's responsibility to check) + */ + +import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; +import { getModelStats } from "@/common/utils/tokens/modelStats"; +import { supports1MContext } from "@/common/utils/ai/models"; + +export interface AutoCompactionCheckResult { + shouldShowWarning: boolean; + usagePercentage: number; + thresholdPercentage: number; +} + +// Auto-compaction threshold (0.7 = 70%) +// TODO: Make this configurable via settings +const AUTO_COMPACTION_THRESHOLD = 0.7; + +// Show warning this many percentage points before threshold +const WARNING_ADVANCE_PERCENT = 10; + +/** + * Check if auto-compaction should trigger based on token usage + * + * @param usage - Current workspace usage state (from useWorkspaceUsage) + * @param model - Current model string + * @param use1M - Whether 1M context is enabled + * @param threshold - Usage percentage threshold (0.0-1.0, default 0.7 = 70%) + * @param warningAdvancePercent - Show warning this many percentage points before threshold (default 10) + * @returns Check result with warning flag and usage percentage + */ +export function shouldAutoCompact( + usage: WorkspaceUsageState | undefined, + model: string, + use1M: boolean, + threshold: number = AUTO_COMPACTION_THRESHOLD, + warningAdvancePercent: number = WARNING_ADVANCE_PERCENT +): AutoCompactionCheckResult { + const thresholdPercentage = threshold * 100; + + // No usage data yet - safe default (don't trigger on first message) + if (!usage || usage.usageHistory.length === 0) { + return { + shouldShowWarning: false, + usagePercentage: 0, + thresholdPercentage, + }; + } + + // Determine max tokens for this model + const modelStats = getModelStats(model); + const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens; + + // No max tokens known - safe default (can't calculate percentage) + if (!maxTokens) { + return { + shouldShowWarning: false, + usagePercentage: 0, + thresholdPercentage, + }; + } + + // Calculate usage percentage from cumulative conversation total + const usagePercentage = (usage.totalTokens / maxTokens) * 100; + + // Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance) + const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent; + + return { + shouldShowWarning, + usagePercentage, + thresholdPercentage, + }; +} diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts index e0d1193e1..269155da1 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.ts @@ -762,7 +762,10 @@ export class StreamingMessageAggregator { muxMeta?.type === "compaction-request" ? { rawCommand: muxMeta.rawCommand, - parsed: muxMeta.parsed, + parsed: { + maxOutputTokens: muxMeta.parsed.maxOutputTokens, + continueMessage: muxMeta.parsed.continueMessage?.text, // Extract text for display + }, } : undefined; diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 0d88b52d4..6e79594ea 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -5,11 +5,17 @@ import type { ToolPolicy } from "@/common/utils/tools/toolPolicy"; import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; import type { ImagePart } from "./ipc"; +// Message to continue with after compaction +export interface ContinueMessage { + text: string; + imageParts?: ImagePart[]; +} + // Parsed compaction request data (shared type for consistency) export interface CompactionRequestData { model?: string; // Custom model override for compaction maxOutputTokens?: number; - continueMessage?: string; + continueMessage?: ContinueMessage; } // Frontend-specific metadata stored in muxMetadata field diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index adbe96ee3..4b61503ac 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -326,9 +326,10 @@ export class AgentSession { // If this is a compaction request with a continue message, queue it for auto-send after compaction const muxMeta = options?.muxMetadata; if (muxMeta?.type === "compaction-request" && muxMeta.parsed.continueMessage && options) { + const { text, imageParts } = muxMeta.parsed.continueMessage; // Strip out edit-specific and compaction-specific fields so the queued message is a fresh user message const { muxMetadata, mode, editMessageId, ...continueOptions } = options; - this.messageQueue.add(muxMeta.parsed.continueMessage, continueOptions); + this.messageQueue.add(text, { ...continueOptions, imageParts }); this.emitQueuedMessageChanged(); } From 3a272c70ec7e7d5a3f2534693789ba9ede86eaa7 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 12:59:54 +1100 Subject: [PATCH 2/9] fix merge conflict --- src/browser/stores/WorkspaceStore.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index d15f4a09a..82c1f1f58 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -437,10 +437,6 @@ export class WorkspaceStore { const model = aggregator.getCurrentModel(); const usageHistory = collectUsageHistory(messages, model); - const messages = aggregator.getAllMessages(); - const model = aggregator.getCurrentModel(); - const usageHistory = cumUsageHistory(messages, model); - // Use last entry's total (each entry is cumulative, not a delta) // Each usageHistory entry contains the FULL prompt tokens for that turn, // so we only need the most recent value, not a sum From 7e67829ac50902eb6628f9f0222b9a59c1ea2d69 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 13:11:56 +1100 Subject: [PATCH 3/9] use model options for 1m context --- src/browser/components/AIView.tsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 314d69047..b8a5d3ab3 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -37,7 +37,7 @@ import { evictModelFromLRU } from "@/browser/hooks/useModelLRU"; import { QueuedMessage } from "./Messages/QueuedMessage"; import { CompactionWarning } from "./CompactionWarning"; import { shouldAutoCompact } from "@/browser/utils/compaction/autoCompactionCheck"; -import { use1MContext } from "@/browser/hooks/use1MContext"; +import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; interface AIViewProps { workspaceId: string; @@ -82,7 +82,8 @@ const AIViewInner: React.FC = ({ const workspaceState = useWorkspaceState(workspaceId); const aggregator = useWorkspaceAggregator(workspaceId); const workspaceUsage = useWorkspaceUsage(workspaceId); - const [use1M] = use1MContext(); + const { options } = useProviderOptions(); + const use1M = options.anthropic?.use1MContext ?? false; const handledModelErrorsRef = useRef>(new Set()); useEffect(() => { From 683d9dfc0a0886ea52a59784946afe9035c12f41 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 13:58:36 +1100 Subject: [PATCH 4/9] pass model and images when resuming --- src/browser/hooks/useResumeManager.ts | 6 +++++- src/browser/utils/messages/StreamingMessageAggregator.ts | 6 ++++-- src/common/types/message.ts | 5 +---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/browser/hooks/useResumeManager.ts b/src/browser/hooks/useResumeManager.ts index afe5a0fcb..5cbc2a502 100644 --- a/src/browser/hooks/useResumeManager.ts +++ b/src/browser/hooks/useResumeManager.ts @@ -172,8 +172,12 @@ export function useResumeManager() { // Apply compaction overrides using shared function (same as ChatInput) // This ensures custom model/tokens are preserved across resume options = applyCompactionOverrides(options, { + model: lastUserMsg.compactionRequest.parsed.model, maxOutputTokens: lastUserMsg.compactionRequest.parsed.maxOutputTokens, - continueMessage: { text: lastUserMsg.compactionRequest.parsed.continueMessage ?? "" }, + continueMessage: { + text: lastUserMsg.compactionRequest.parsed.continueMessage?.text ?? "", + imageParts: lastUserMsg.compactionRequest.parsed.continueMessage?.imageParts, + }, }); } } diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts index 269155da1..10bccd05d 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.ts @@ -3,6 +3,7 @@ import type { MuxMetadata, MuxImagePart, DisplayedMessage, + CompactionRequestData, } from "@/common/types/message"; import { createMuxMessage } from "@/common/types/message"; import type { @@ -763,9 +764,10 @@ export class StreamingMessageAggregator { ? { rawCommand: muxMeta.rawCommand, parsed: { + model: muxMeta.parsed.model, maxOutputTokens: muxMeta.parsed.maxOutputTokens, - continueMessage: muxMeta.parsed.continueMessage?.text, // Extract text for display - }, + continueMessage: muxMeta.parsed.continueMessage, + } satisfies CompactionRequestData, } : undefined; diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 6e79594ea..bb47236bf 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -106,10 +106,7 @@ export type DisplayedMessage = compactionRequest?: { // Present if this is a /compact command rawCommand: string; - parsed: { - maxOutputTokens?: number; - continueMessage?: string; - }; + parsed: CompactionRequestData; }; } | { From 686e43977f91c8697cdd2616e87a60c9b89e4a45 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 20:06:55 +1100 Subject: [PATCH 5/9] fix usage calc --- src/browser/stores/WorkspaceStore.ts | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 82c1f1f58..4576fb3fd 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -437,17 +437,17 @@ export class WorkspaceStore { const model = aggregator.getCurrentModel(); const usageHistory = collectUsageHistory(messages, model); - // Use last entry's total (each entry is cumulative, not a delta) - // Each usageHistory entry contains the FULL prompt tokens for that turn, - // so we only need the most recent value, not a sum - const lastEntry = usageHistory[usageHistory.length - 1]; - const totalTokens = lastEntry - ? lastEntry.input.tokens + - lastEntry.cached.tokens + - lastEntry.cacheCreate.tokens + - lastEntry.output.tokens + - lastEntry.reasoning.tokens - : 0; + // Calculate total from usage history (now includes historical) + const totalTokens = usageHistory.reduce( + (sum, u) => + sum + + u.input.tokens + + u.cached.tokens + + u.cacheCreate.tokens + + u.output.tokens + + u.reasoning.tokens, + 0 + ); return { usageHistory, totalTokens }; }); From 8b68d1601c9b4e83245639178c29f7587e41434b Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 21 Nov 2025 20:41:16 +1100 Subject: [PATCH 6/9] fix usage calc --- src/browser/utils/chatCommands.ts | 3 --- .../utils/compaction/autoCompactionCheck.ts | 24 +++++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts index e388f7894..2f81bc5a4 100644 --- a/src/browser/utils/chatCommands.ts +++ b/src/browser/utils/chatCommands.ts @@ -271,9 +271,6 @@ function formatCompactionCommand(options: CompactionOptions): string { if (options.model) { cmd += ` -m ${options.model}`; } - if (options.continueMessage) { - cmd += `\n${options.continueMessage.text}`; - } return cmd; } diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts index 4369eadc4..c532395c7 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -35,6 +35,10 @@ const WARNING_ADVANCE_PERCENT = 10; /** * Check if auto-compaction should trigger based on token usage * + * Uses the last usage entry (most recent API call) to calculate current context size. + * This matches the UI token meter display and excludes historical usage from compaction, + * preventing infinite compaction loops after the first compaction completes. + * * @param usage - Current workspace usage state (from useWorkspaceUsage) * @param model - Current model string * @param use1M - Whether 1M context is enabled @@ -73,8 +77,24 @@ export function shouldAutoCompact( }; } - // Calculate usage percentage from cumulative conversation total - const usagePercentage = (usage.totalTokens / maxTokens) * 100; + // Use last usage entry to calculate current context size (matches UI display) + const lastUsage = usage.usageHistory[usage.usageHistory.length - 1]; + if (!lastUsage) { + return { + shouldShowWarning: false, + usagePercentage: 0, + thresholdPercentage, + }; + } + + const currentContextTokens = + lastUsage.input.tokens + + lastUsage.cached.tokens + + lastUsage.cacheCreate.tokens + + lastUsage.output.tokens + + lastUsage.reasoning.tokens; + + const usagePercentage = (currentContextTokens / maxTokens) * 100; // Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance) const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent; From 444c3baae2227de80875f114204178d2951a51df Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 24 Nov 2025 12:49:09 +1100 Subject: [PATCH 7/9] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20make=20countdown?= =?UTF-8?q?=20warning=20smaller=20and=20less=20intrusive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Countdown warning (60-69%): Small grey text, right-aligned - Urgent warning (70%+): Keep prominent blue box styling - Makes countdown unobtrusive while keeping urgent warning visible _Generated with `mux`_ --- src/browser/components/CompactionWarning.tsx | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/browser/components/CompactionWarning.tsx b/src/browser/components/CompactionWarning.tsx index 7688f1bad..a48711200 100644 --- a/src/browser/components/CompactionWarning.tsx +++ b/src/browser/components/CompactionWarning.tsx @@ -21,16 +21,20 @@ export const CompactionWarning: React.FC<{ // At threshold or above, next message will trigger compaction const willCompactNext = props.usagePercentage >= props.thresholdPercentage; - // Calculate remaining percentage until threshold - const remaining = props.thresholdPercentage - props.usagePercentage; - - const message = willCompactNext - ? "⚠️ Context limit reached. Next message will trigger auto-compaction." - : `Context left until Auto-Compact: ${Math.round(remaining)}%`; + // Urgent warning at/above threshold - prominent blue box + if (willCompactNext) { + return ( +
+ ⚠️ Context limit reached. Next message will trigger auto-compaction. +
+ ); + } + // Countdown warning below threshold - subtle grey text, right-aligned + const remaining = props.thresholdPercentage - props.usagePercentage; return ( -
- {message} +
+ Context left until Auto-Compact: {Math.round(remaining)}%
); }; From 2f2d3eb124735eba591c805317a88c22737e06b3 Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 24 Nov 2025 13:07:54 +1100 Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=A4=96=20fix:=20prevent=20double-comp?= =?UTF-8?q?action=20when=20sending=20during=20active=20compaction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds !isCompacting check to shouldAutoCompact calculation to prevent queueing a second compaction request when user sends a message while the first compaction is still running. Without this check, messages sent during compaction would trigger another compaction, resulting in back-to-back compactions and delayed user messages. _Generated with `mux`_ --- src/browser/components/ChatInput/index.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 507520c8a..a92ba98b9 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -695,7 +695,9 @@ export const ChatInput: React.FC = (props) => { // Result is computed in parent (AIView) and passed down to avoid duplicate calculation const shouldAutoCompact = props.autoCompactionCheck && - props.autoCompactionCheck.usagePercentage >= props.autoCompactionCheck.thresholdPercentage; + props.autoCompactionCheck.usagePercentage >= + props.autoCompactionCheck.thresholdPercentage && + !isCompacting; // Skip if already compacting to prevent double-compaction queue if (variant === "workspace" && !editingMessage && shouldAutoCompact) { // Clear input immediately for responsive UX setInput(""); From b184dcf84cf16fccbcf8e96c26fe1031c65b4bab Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 24 Nov 2025 13:44:31 +1100 Subject: [PATCH 9/9] =?UTF-8?q?=F0=9F=A4=96=20test:=20add=20comprehensive?= =?UTF-8?q?=20unit=20tests=20for=20shouldAutoCompact?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 23 unit tests covering: - Basic functionality (safe defaults, threshold detection) - Usage calculation (last entry vs cumulative, historical usage handling) - 1M context mode (model support, fallback behavior) - Edge cases (zero tokens, custom thresholds, boundary conditions) - Percentage calculation accuracy All tests verify the infinite loop fix - that historical usage from compaction is correctly excluded from threshold calculations. Tests run: 23 pass, 0 fail, 62 expect() calls _Generated with `mux`_ --- .../compaction/autoCompactionCheck.test.ts | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 src/browser/utils/compaction/autoCompactionCheck.test.ts diff --git a/src/browser/utils/compaction/autoCompactionCheck.test.ts b/src/browser/utils/compaction/autoCompactionCheck.test.ts new file mode 100644 index 000000000..8e1d26f3d --- /dev/null +++ b/src/browser/utils/compaction/autoCompactionCheck.test.ts @@ -0,0 +1,300 @@ +import { describe, test, expect } from "bun:test"; +import { shouldAutoCompact } from "./autoCompactionCheck"; +import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; +import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; +import { KNOWN_MODELS } from "@/common/constants/knownModels"; + +// Helper to create a mock usage entry +const createUsageEntry = ( + tokens: number, + model: string = KNOWN_MODELS.SONNET.id +): ChatUsageDisplay => { + // Distribute tokens across different types (realistic pattern) + const inputTokens = Math.floor(tokens * 0.6); // 60% input + const outputTokens = Math.floor(tokens * 0.3); // 30% output + const cachedTokens = Math.floor(tokens * 0.1); // 10% cached + + return { + input: { tokens: inputTokens }, + cached: { tokens: cachedTokens }, + cacheCreate: { tokens: 0 }, + output: { tokens: outputTokens }, + reasoning: { tokens: 0 }, + model, + }; +}; + +// Helper to create mock WorkspaceUsageState +const createMockUsage = ( + lastEntryTokens: number, + historicalTokens?: number, + model: string = KNOWN_MODELS.SONNET.id +): WorkspaceUsageState => { + const usageHistory: ChatUsageDisplay[] = []; + + if (historicalTokens !== undefined) { + // Add historical usage (from compaction) + usageHistory.push(createUsageEntry(historicalTokens, "historical-model")); + } + + // Add recent usage + usageHistory.push(createUsageEntry(lastEntryTokens, model)); + + return { usageHistory, totalTokens: 0 }; +}; + +describe("shouldAutoCompact", () => { + const SONNET_MAX_TOKENS = 200_000; + const SONNET_70_PERCENT = SONNET_MAX_TOKENS * 0.7; // 140,000 + const SONNET_60_PERCENT = SONNET_MAX_TOKENS * 0.6; // 120,000 + + describe("Basic Functionality", () => { + test("returns false when no usage data (first message)", () => { + const result = shouldAutoCompact(undefined, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(0); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns false when usage history is empty", () => { + const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 }; + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(0); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns false when model has no max_input_tokens (unknown model)", () => { + const usage = createMockUsage(50_000); + const result = shouldAutoCompact(usage, "unknown-model", false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(0); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns false when usage is low (10%)", () => { + const usage = createMockUsage(20_000); // 10% of 200k + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(10); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns true at warning threshold (60% with default 10% advance)", () => { + const usage = createMockUsage(SONNET_60_PERCENT); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(60); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns true at compaction threshold (70%)", () => { + const usage = createMockUsage(SONNET_70_PERCENT); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(70); + expect(result.thresholdPercentage).toBe(70); + }); + + test("returns true above threshold (80%)", () => { + const usage = createMockUsage(160_000); // 80% of 200k + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(80); + expect(result.thresholdPercentage).toBe(70); + }); + }); + + describe("Usage Calculation (Critical for infinite loop fix)", () => { + test("uses last usage entry tokens, not cumulative sum", () => { + const usage = createMockUsage(10_000); // Only 5% of context + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + // Should be 5%, not counting historical + expect(result.usagePercentage).toBe(5); + expect(result.shouldShowWarning).toBe(false); + }); + + test("handles historical usage correctly - ignores it in calculation", () => { + // Scenario: After compaction, historical = 70K, recent = 5K + // Should calculate based on 5K (2.5%), not 75K (37.5%) + const usage = createMockUsage(5_000, 70_000); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.usagePercentage).toBe(2.5); + expect(result.shouldShowWarning).toBe(false); + }); + + test("includes all token types in calculation", () => { + // Create usage with all token types specified + const usage: WorkspaceUsageState = { + usageHistory: [ + { + input: { tokens: 10_000 }, + cached: { tokens: 5_000 }, + cacheCreate: { tokens: 2_000 }, + output: { tokens: 3_000 }, + reasoning: { tokens: 1_000 }, + model: KNOWN_MODELS.SONNET.id, + }, + ], + totalTokens: 0, + }; + + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + // Total: 10k + 5k + 2k + 3k + 1k = 21k tokens = 10.5% + expect(result.usagePercentage).toBe(10.5); + }); + }); + + describe("1M Context Mode", () => { + test("uses 1M tokens when use1M=true and model supports it (Sonnet 4)", () => { + const usage = createMockUsage(600_000); // 60% of 1M + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, true); + + expect(result.usagePercentage).toBe(60); + expect(result.shouldShowWarning).toBe(true); + }); + + test("uses 1M tokens for Sonnet with use1M=true (model is claude-sonnet-4-5)", () => { + const usage = createMockUsage(700_000); // 70% of 1M + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, true); + + expect(result.usagePercentage).toBe(70); + expect(result.shouldShowWarning).toBe(true); + }); + + test("uses standard max_input_tokens when use1M=false", () => { + const usage = createMockUsage(140_000); // 70% of 200k + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.usagePercentage).toBe(70); + expect(result.shouldShowWarning).toBe(true); + }); + + test("ignores use1M for models that don't support it (GPT)", () => { + const usage = createMockUsage(100_000, undefined, KNOWN_MODELS.GPT_MINI.id); + // GPT Mini has 272k context, so 100k = 36.76% + const result = shouldAutoCompact(usage, KNOWN_MODELS.GPT_MINI.id, true); + + // Should use standard 272k, not 1M (use1M ignored for GPT) + expect(result.usagePercentage).toBeCloseTo(36.76, 1); + expect(result.shouldShowWarning).toBe(false); + }); + }); + + describe("Edge Cases", () => { + test("empty usageHistory array returns safe defaults", () => { + const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 }; + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(0); + expect(result.thresholdPercentage).toBe(70); + }); + + test("single entry in usageHistory works correctly", () => { + const usage = createMockUsage(140_000); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(70); + }); + + test("custom threshold parameter (80%)", () => { + const usage = createMockUsage(140_000); // 70% of context + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false, 0.8); // 80% threshold + + // At 70%, should NOT show warning for 80% threshold (needs 70% advance = 10%) + expect(result.shouldShowWarning).toBe(true); // 70% >= (80% - 10% = 70%) + expect(result.usagePercentage).toBe(70); + expect(result.thresholdPercentage).toBe(80); + }); + + test("custom warning advance (5% instead of 10%)", () => { + const usage = createMockUsage(130_000); // 65% of context + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false, 0.7, 5); + + // At 65%, should show warning with 5% advance (70% - 5% = 65%) + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(65); + expect(result.thresholdPercentage).toBe(70); + }); + + test("handles zero tokens gracefully", () => { + const usage: WorkspaceUsageState = { + usageHistory: [ + { + input: { tokens: 0 }, + cached: { tokens: 0 }, + cacheCreate: { tokens: 0 }, + output: { tokens: 0 }, + reasoning: { tokens: 0 }, + model: KNOWN_MODELS.SONNET.id, + }, + ], + totalTokens: 0, + }; + + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(false); + expect(result.usagePercentage).toBe(0); + }); + + test("handles usage at exactly 100% of context", () => { + const usage = createMockUsage(SONNET_MAX_TOKENS); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(100); + expect(result.thresholdPercentage).toBe(70); + }); + + test("handles usage beyond 100% of context", () => { + const usage = createMockUsage(SONNET_MAX_TOKENS + 50_000); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.shouldShowWarning).toBe(true); + expect(result.usagePercentage).toBe(125); + expect(result.thresholdPercentage).toBe(70); + }); + }); + + describe("Percentage Calculation Accuracy", () => { + test("calculates percentage correctly for various token counts", () => { + // Test specific percentages + const testCases = [ + { tokens: 20_000, expectedPercent: 10 }, + { tokens: 40_000, expectedPercent: 20 }, + { tokens: 100_000, expectedPercent: 50 }, + { tokens: 120_000, expectedPercent: 60 }, + { tokens: 140_000, expectedPercent: 70 }, + { tokens: 160_000, expectedPercent: 80 }, + { tokens: 180_000, expectedPercent: 90 }, + ]; + + for (const { tokens, expectedPercent } of testCases) { + const usage = createMockUsage(tokens); + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + expect(result.usagePercentage).toBe(expectedPercent); + } + }); + + test("handles fractional percentages correctly", () => { + const usage = createMockUsage(123_456); // 61.728% + const result = shouldAutoCompact(usage, KNOWN_MODELS.SONNET.id, false); + + expect(result.usagePercentage).toBeCloseTo(61.728, 2); + expect(result.shouldShowWarning).toBe(true); // Above 60% + }); + }); +});