Skip to content

Commit 22443ea

Browse files
committed
🤖 feat: force compaction when approaching context limit
Add force compaction that triggers automatically when live token usage approaches the context window limit during streaming. This prevents the AI from hitting context limit errors mid-response. Key changes: 1. Auto-compaction check extended with shouldForceCompact flag - Uses liveUsage (real-time streaming) or falls back to lastUsage - Triggers when remaining tokens <= FORCE_COMPACTION_TOKEN_BUFFER - Works even with empty usageHistory (first message streaming) 2. AIView triggers force compaction during active streams - Tracks triggered stream to prevent duplicate compactions - Sends compaction request with 'Continue with current task' message 3. Queue restore moved from stream-abort to IPC interrupt handler - Prevents continueMessage from bouncing back to input on internal aborts - User-initiated interrupts still restore queued messages to input 4. Shared constants for compaction parameters - DEFAULT_COMPACTION_WORD_TARGET = 2000 - WORDS_TO_TOKENS_RATIO = 1.3 - FORCE_COMPACTION_TOKEN_BUFFER = 5200 (2x expected output)
1 parent ebb8e1b commit 22443ea

File tree

8 files changed

+243
-193
lines changed

8 files changed

+243
-193
lines changed

bun.lock

Lines changed: 8 additions & 133 deletions
Large diffs are not rendered by default.

src/browser/components/AIView.tsx

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import { evictModelFromLRU } from "@/browser/hooks/useModelLRU";
3737
import { QueuedMessage } from "./Messages/QueuedMessage";
3838
import { CompactionWarning } from "./CompactionWarning";
3939
import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck";
40+
import { executeCompaction } from "@/browser/utils/chatCommands";
4041
import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
4142
import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings";
4243
import { useSendMessageOptions } from "@/browser/hooks/useSendMessageOptions";
@@ -120,6 +121,67 @@ const AIViewInner: React.FC<AIViewProps> = ({
120121
undefined
121122
);
122123

124+
// Use send options for auto-compaction check
125+
const pendingSendOptions = useSendMessageOptions(workspaceId);
126+
127+
// Track if we've already triggered force compaction for this stream
128+
const forceCompactionTriggeredRef = useRef<string | null>(null);
129+
130+
// Extract state from workspace state
131+
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
132+
133+
// Get active stream message ID for token counting
134+
const activeStreamMessageId = aggregator.getActiveStreamMessageId();
135+
136+
// Use pending send model for auto-compaction check, not the last stream's model.
137+
// This ensures the threshold is based on the model the user will actually send with,
138+
// preventing context-length errors when switching from a large-context to smaller model.
139+
const pendingModel = pendingSendOptions.model;
140+
141+
const autoCompactionResult = checkAutoCompaction(
142+
workspaceUsage,
143+
pendingModel,
144+
use1M,
145+
autoCompactionEnabled,
146+
autoCompactionThreshold / 100
147+
);
148+
149+
// Show warning when: shouldShowWarning flag is true AND not currently compacting
150+
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
151+
152+
// Force compaction when live usage shows we're about to hit context limit
153+
useEffect(() => {
154+
if (
155+
!autoCompactionResult.shouldForceCompact ||
156+
!canInterrupt ||
157+
isCompacting ||
158+
forceCompactionTriggeredRef.current === activeStreamMessageId
159+
) {
160+
return;
161+
}
162+
163+
forceCompactionTriggeredRef.current = activeStreamMessageId ?? null;
164+
void executeCompaction({
165+
workspaceId,
166+
sendMessageOptions: pendingSendOptions,
167+
continueMessage: { text: "Continue with the current task" },
168+
});
169+
}, [
170+
autoCompactionResult.shouldForceCompact,
171+
canInterrupt,
172+
isCompacting,
173+
activeStreamMessageId,
174+
workspaceId,
175+
pendingSendOptions,
176+
]);
177+
178+
// Reset force compaction trigger when stream ends
179+
useEffect(() => {
180+
if (!canInterrupt) {
181+
forceCompactionTriggeredRef.current = null;
182+
}
183+
}, [canInterrupt]);
184+
123185
// Auto-retry state - minimal setter for keybinds and message sent handler
124186
// RetryBarrier manages its own state, but we need this for interrupt keybind
125187
const [, setAutoRetry] = usePersistedState<boolean>(
@@ -144,9 +206,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
144206
markUserInteraction,
145207
} = useAutoScroll();
146208

147-
// Use send options for auto-compaction check
148-
const pendingSendOptions = useSendMessageOptions(workspaceId);
149-
150209
// ChatInput API for focus management
151210
const chatInputAPI = useRef<ChatInputAPI | null>(null);
152211
const handleChatInputReady = useCallback((api: ChatInputAPI) => {
@@ -329,28 +388,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
329388
);
330389
}
331390

332-
// Extract state from workspace state
333-
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;
334-
335-
// Get active stream message ID for token counting
336-
const activeStreamMessageId = aggregator.getActiveStreamMessageId();
337-
338-
// Use pending send model for auto-compaction check, not the last stream's model.
339-
// This ensures the threshold is based on the model the user will actually send with,
340-
// preventing context-length errors when switching from a large-context to smaller model.
341-
const pendingModel = pendingSendOptions.model;
342-
343-
const autoCompactionResult = checkAutoCompaction(
344-
workspaceUsage,
345-
pendingModel,
346-
use1M,
347-
autoCompactionEnabled,
348-
autoCompactionThreshold / 100
349-
);
350-
351-
// Show warning when: shouldShowWarning flag is true AND not currently compacting
352-
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;
353-
354391
// Note: We intentionally do NOT reset autoRetry when streams start.
355392
// If user pressed the interrupt key, autoRetry stays false until they manually retry.
356393
// This makes state transitions explicit and predictable.

src/browser/utils/chatCommands.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ import { resolveCompactionModel } from "@/browser/utils/messages/compactionModel
2323
import type { ImageAttachment } from "../components/ImageAttachments";
2424
import { dispatchWorkspaceSwitch } from "./workspaceEvents";
2525
import { getRuntimeKey, copyWorkspaceStorage } from "@/common/constants/storage";
26+
import {
27+
DEFAULT_COMPACTION_WORD_TARGET,
28+
WORDS_TO_TOKENS_RATIO,
29+
} from "@/common/constants/ui";
2630

2731
// ============================================================================
2832
// Workspace Creation
@@ -572,7 +576,9 @@ export function prepareCompactionMessage(options: CompactionOptions): {
572576
metadata: MuxFrontendMetadata;
573577
sendOptions: SendMessageOptions;
574578
} {
575-
const targetWords = options.maxOutputTokens ? Math.round(options.maxOutputTokens / 1.3) : 2000;
579+
const targetWords = options.maxOutputTokens
580+
? Math.round(options.maxOutputTokens / WORDS_TO_TOKENS_RATIO)
581+
: DEFAULT_COMPACTION_WORD_TARGET;
576582

577583
// Build compaction message with optional continue context
578584
let messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Focus entirely on the summary of what has happened. Do not suggest next steps or future actions. Use approximately ${targetWords} words.`;

src/browser/utils/compaction/autoCompactionCheck.test.ts

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { checkAutoCompaction } from "./autoCompactionCheck";
33
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
44
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
55
import { KNOWN_MODELS } from "@/common/constants/knownModels";
6+
import { FORCE_COMPACTION_TOKEN_BUFFER } from "@/common/constants/ui";
67

78
// Helper to create a mock usage entry
89
const createUsageEntry = (
@@ -28,7 +29,8 @@ const createUsageEntry = (
2829
const createMockUsage = (
2930
lastEntryTokens: number,
3031
historicalTokens?: number,
31-
model: string = KNOWN_MODELS.SONNET.id
32+
model: string = KNOWN_MODELS.SONNET.id,
33+
liveUsage?: ChatUsageDisplay
3234
): WorkspaceUsageState => {
3335
const usageHistory: ChatUsageDisplay[] = [];
3436

@@ -40,7 +42,7 @@ const createMockUsage = (
4042
// Add recent usage
4143
usageHistory.push(createUsageEntry(lastEntryTokens, model));
4244

43-
return { usageHistory, totalTokens: 0 };
45+
return { usageHistory, totalTokens: 0, liveUsage };
4446
};
4547

4648
describe("checkAutoCompaction", () => {
@@ -297,4 +299,77 @@ describe("checkAutoCompaction", () => {
297299
expect(result.shouldShowWarning).toBe(true); // Above 60%
298300
});
299301
});
302+
303+
describe("Force Compaction (Live Usage)", () => {
304+
const SONNET_MAX_TOKENS = 200_000;
305+
const BUFFER = FORCE_COMPACTION_TOKEN_BUFFER;
306+
307+
test("shouldForceCompact is false when no liveUsage (falls back to lastUsage with room)", () => {
308+
const usage = createMockUsage(100_000); // 100k remaining - plenty of room
309+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
310+
311+
expect(result.shouldForceCompact).toBe(false);
312+
});
313+
314+
test("shouldForceCompact is false when currentUsage has plenty of room", () => {
315+
const liveUsage = createUsageEntry(100_000); // 100k remaining
316+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
317+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
318+
319+
expect(result.shouldForceCompact).toBe(false);
320+
});
321+
322+
test("shouldForceCompact is true when remaining <= buffer", () => {
323+
// Exactly at buffer threshold
324+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
325+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
326+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
327+
328+
expect(result.shouldForceCompact).toBe(true);
329+
});
330+
331+
test("shouldForceCompact is true when over context limit", () => {
332+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS + 5000);
333+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
334+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
335+
336+
expect(result.shouldForceCompact).toBe(true);
337+
});
338+
339+
test("shouldForceCompact is false when just above buffer", () => {
340+
// 1 token above buffer threshold
341+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER - 1);
342+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
343+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
344+
345+
expect(result.shouldForceCompact).toBe(false);
346+
});
347+
348+
test("shouldForceCompact respects 1M context mode", () => {
349+
// With 1M context, exactly at buffer threshold
350+
const liveUsage = createUsageEntry(1_000_000 - BUFFER);
351+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
352+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true, true);
353+
354+
expect(result.shouldForceCompact).toBe(true);
355+
});
356+
357+
test("shouldForceCompact triggers with empty history but liveUsage near limit", () => {
358+
// Bug fix: empty history but liveUsage should still trigger
359+
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
360+
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0, liveUsage };
361+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);
362+
363+
expect(result.shouldForceCompact).toBe(true);
364+
expect(result.usagePercentage).toBe(0); // No lastUsage for percentage
365+
});
366+
367+
test("shouldForceCompact is false when auto-compaction disabled", () => {
368+
const liveUsage = createUsageEntry(199_000); // Very close to limit
369+
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
370+
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, false); // disabled
371+
372+
expect(result.shouldForceCompact).toBe(false);
373+
});
374+
});
300375
});

src/browser/utils/compaction/autoCompactionCheck.ts

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,29 @@
1616
*/
1717

1818
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
19+
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
1920
import { getModelStats } from "@/common/utils/tokens/modelStats";
2021
import { supports1MContext } from "@/common/utils/ai/models";
21-
import { DEFAULT_AUTO_COMPACTION_THRESHOLD } from "@/common/constants/ui";
22+
import {
23+
DEFAULT_AUTO_COMPACTION_THRESHOLD,
24+
FORCE_COMPACTION_TOKEN_BUFFER,
25+
} from "@/common/constants/ui";
26+
27+
/** Sum all token components from a ChatUsageDisplay */
28+
function getTotalTokens(usage: ChatUsageDisplay): number {
29+
return (
30+
usage.input.tokens +
31+
usage.cached.tokens +
32+
usage.cacheCreate.tokens +
33+
usage.output.tokens +
34+
usage.reasoning.tokens
35+
);
36+
}
2237

2338
export interface AutoCompactionCheckResult {
2439
shouldShowWarning: boolean;
40+
/** True when live usage shows ≤FORCE_COMPACTION_TOKEN_BUFFER remaining in context */
41+
shouldForceCompact: boolean;
2542
usagePercentage: number;
2643
thresholdPercentage: number;
2744
}
@@ -54,11 +71,11 @@ export function checkAutoCompaction(
5471
): AutoCompactionCheckResult {
5572
const thresholdPercentage = threshold * 100;
5673

57-
// Short-circuit if auto-compaction is disabled
58-
// Or if no usage data yet
59-
if (!enabled || !model || !usage || usage.usageHistory.length === 0) {
74+
// Short-circuit if auto-compaction is disabled or missing required data
75+
if (!enabled || !model || !usage) {
6076
return {
6177
shouldShowWarning: false,
78+
shouldForceCompact: false,
6279
usagePercentage: 0,
6380
thresholdPercentage,
6481
};
@@ -67,31 +84,44 @@ export function checkAutoCompaction(
6784
// Determine max tokens for this model
6885
const modelStats = getModelStats(model);
6986
const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
70-
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
7187

7288
// No max tokens known - safe default (can't calculate percentage)
7389
if (!maxTokens) {
7490
return {
7591
shouldShowWarning: false,
92+
shouldForceCompact: false,
7693
usagePercentage: 0,
7794
thresholdPercentage,
7895
};
7996
}
8097

81-
const currentContextTokens =
82-
lastUsage.input.tokens +
83-
lastUsage.cached.tokens +
84-
lastUsage.cacheCreate.tokens +
85-
lastUsage.output.tokens +
86-
lastUsage.reasoning.tokens;
98+
// Current usage: live when streaming, else last historical (pattern from CostsTab)
99+
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
100+
const currentUsage = usage.liveUsage ?? lastUsage;
101+
102+
// Force-compact when approaching context limit (can trigger even with empty history if streaming)
103+
let shouldForceCompact = false;
104+
if (currentUsage) {
105+
const remainingTokens = maxTokens - getTotalTokens(currentUsage);
106+
shouldForceCompact = remainingTokens <= FORCE_COMPACTION_TOKEN_BUFFER;
107+
}
87108

88-
const usagePercentage = (currentContextTokens / maxTokens) * 100;
109+
// Warning/percentage based on lastUsage (completed requests only)
110+
if (!lastUsage) {
111+
return {
112+
shouldShowWarning: false,
113+
shouldForceCompact,
114+
usagePercentage: 0,
115+
thresholdPercentage,
116+
};
117+
}
89118

90-
// Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance)
119+
const usagePercentage = (getTotalTokens(lastUsage) / maxTokens) * 100;
91120
const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent;
92121

93122
return {
94123
shouldShowWarning,
124+
shouldForceCompact,
95125
usagePercentage,
96126
thresholdPercentage,
97127
};

src/common/constants/ui.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,28 @@ export const DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT = 70;
2727
* Default threshold as decimal for calculations (0.7 = 70%)
2828
*/
2929
export const DEFAULT_AUTO_COMPACTION_THRESHOLD = DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT / 100;
30+
31+
/**
32+
* Default word target for compaction summaries
33+
*/
34+
export const DEFAULT_COMPACTION_WORD_TARGET = 2000;
35+
36+
/**
37+
* Approximate ratio of tokens to words (tokens per word)
38+
* Used for converting between word counts and token counts
39+
*/
40+
export const WORDS_TO_TOKENS_RATIO = 1.3;
41+
42+
/**
43+
* Force-compaction token buffer.
44+
* When auto-compaction is enabled and live usage shows this many tokens or fewer
45+
* remaining in the context window, force a compaction immediately.
46+
* Set to 2x the expected compaction output size to ensure room for the summary.
47+
*/
48+
export const FORCE_COMPACTION_TOKEN_BUFFER = Math.round(
49+
2 * DEFAULT_COMPACTION_WORD_TARGET * WORDS_TO_TOKENS_RATIO
50+
); // = 5200 tokens
51+
3052
/**
3153
* Duration (ms) to show "copied" feedback after copying to clipboard
3254
*/

0 commit comments

Comments
 (0)