diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7da9968411..1366eb6f84 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,8 @@ jobs: - name: Install and setup shfmt run: | - if [[ ! -f "$HOME/.local/bin/shfmt" ]]; then + # Install shfmt if not cached or if cached binary is broken + if [[ ! -f "$HOME/.local/bin/shfmt" ]] || ! "$HOME/.local/bin/shfmt" --version >/dev/null 2>&1; then curl -sS https://webinstall.dev/shfmt | bash fi echo "$HOME/.local/bin" >> $GITHUB_PATH diff --git a/docs/context-management.md b/docs/context-management.md index dfa08bef41..f8a72afde3 100644 --- a/docs/context-management.md +++ b/docs/context-management.md @@ -47,13 +47,14 @@ Compress conversation history using AI summarization. Replaces the conversation ### Syntax ``` -/compact [-t ] +/compact [-t ] [-m ] [continue message on subsequent lines] ``` ### Options - `-t ` - Maximum output tokens for the summary (default: ~2000 words) +- `-m ` - Model to use for compaction (default: workspace model). Supports abbreviations like `haiku`, `sonnet`, or full model strings ### Examples @@ -69,6 +70,14 @@ Compress conversation history using AI summarization. Replaces the conversation /compact -t 5000 ``` +**Choose compaction model:** + +``` +/compact -m haiku +``` + +Use Haiku for faster, lower-cost compaction. + **Auto-continue with custom message:** ``` @@ -88,16 +97,18 @@ Make sure to add tests for the error cases. Continue messages can span multiple lines for more detailed instructions. -**Combine token limit and auto-continue:** +**Combine all options:** ``` -/compact -t 3000 +/compact -m haiku -t 8000 Keep working on the feature ``` +Combine custom model, token limit, and auto-continue message. + ### Notes -- Uses the selected LLM to summarize conversation history +- Uses the specified model (or workspace model by default) to summarize conversation history - Preserves actionable context and specific details - **Irreversible** - original messages are replaced - Continue message is sent once after compaction completes (not persisted) diff --git a/src/components/AIView.tsx b/src/components/AIView.tsx index 01b116ce77..5e1e9208a1 100644 --- a/src/components/AIView.tsx +++ b/src/components/AIView.tsx @@ -495,7 +495,9 @@ const AIViewInner: React.FC = ({ = { - thinkingLevel: isAnthropic ? "off" : sendMessageOptions.thinkingLevel, - toolPolicy: [{ regex_match: "compact_summary", action: "require" }], - maxOutputTokens: parsed.maxOutputTokens, - mode: "compact" as const, - }; + // Apply compaction overrides using shared transformation function + // This same function is used by useResumeManager to ensure consistency + const options = applyCompactionOverrides(sendMessageOptions, compactData); return { messageText, metadata, options }; } diff --git a/src/hooks/useResumeManager.ts b/src/hooks/useResumeManager.ts index 16d932acfe..dbf4f38d00 100644 --- a/src/hooks/useResumeManager.ts +++ b/src/hooks/useResumeManager.ts @@ -5,6 +5,7 @@ import { getAutoRetryKey, getRetryStateKey } from "@/constants/storage"; import { getSendOptionsFromStorage } from "@/utils/messages/sendOptions"; import { readPersistedState } from "./usePersistedState"; import { hasInterruptedStream } from "@/utils/messages/retryEligibility"; +import { applyCompactionOverrides } from "@/utils/messages/compactionOptions"; interface RetryState { attempt: number; @@ -139,7 +140,20 @@ export function useResumeManager() { const { attempt } = retryState; try { - const options = getSendOptionsFromStorage(workspaceId); + // Start with workspace defaults + let options = getSendOptionsFromStorage(workspaceId); + + // Check if last user message was a compaction request + const state = workspaceStatesRef.current.get(workspaceId); + if (state) { + const lastUserMsg = [...state.messages].reverse().find((msg) => msg.type === "user"); + if (lastUserMsg?.compactionRequest) { + // Apply compaction overrides using shared function (same as ChatInput) + // This ensures custom model/tokens are preserved across resume + options = applyCompactionOverrides(options, lastUserMsg.compactionRequest.parsed); + } + } + const result = await window.api.workspace.resumeStream(workspaceId, options); if (!result.success) { diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts index 670eaf01b9..77fd0ea107 100644 --- a/src/services/agentSession.ts +++ b/src/services/agentSession.ts @@ -12,6 +12,7 @@ import type { SendMessageError } from "@/types/errors"; import { createUnknownSendMessageError } from "@/services/utils/sendMessageError"; import type { Result } from "@/types/result"; import { Ok, Err } from "@/types/result"; +import { enforceThinkingPolicy } from "@/utils/thinking/policy"; interface ImagePart { image: string; @@ -297,11 +298,17 @@ export class AgentSession { return Err(createUnknownSendMessageError(historyResult.error)); } + // Enforce thinking policy for the specified model (single source of truth) + // This ensures model-specific requirements are met regardless of where the request originates + const effectiveThinkingLevel = options?.thinkingLevel + ? enforceThinkingPolicy(modelString, options.thinkingLevel) + : undefined; + const streamResult = await this.aiService.streamMessage( historyResult.data, this.workspaceId, modelString, - options?.thinkingLevel, + effectiveThinkingLevel, options?.toolPolicy, undefined, options?.additionalSystemInstructions, diff --git a/src/types/message.ts b/src/types/message.ts index 24cff7a1f8..04de8ee517 100644 --- a/src/types/message.ts +++ b/src/types/message.ts @@ -3,16 +3,20 @@ import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import type { StreamErrorType } from "./errors"; import type { ToolPolicy } from "@/utils/tools/toolPolicy"; +// Parsed compaction request data (shared type for consistency) +export interface CompactionRequestData { + model?: string; // Custom model override for compaction + maxOutputTokens?: number; + continueMessage?: string; +} + // Frontend-specific metadata stored in cmuxMetadata field // Backend stores this as-is without interpretation (black-box) export type CmuxFrontendMetadata = | { type: "compaction-request"; rawCommand: string; // The original /compact command as typed by user (for display) - parsed: { - maxOutputTokens?: number; - continueMessage?: string; - }; + parsed: CompactionRequestData; } | { type: "compaction-result"; diff --git a/src/utils/messages/compactionOptions.test.ts b/src/utils/messages/compactionOptions.test.ts new file mode 100644 index 0000000000..3b78b9f410 --- /dev/null +++ b/src/utils/messages/compactionOptions.test.ts @@ -0,0 +1,81 @@ +/** + * Tests for compaction options transformation + */ + +import { applyCompactionOverrides } from "./compactionOptions"; +import type { SendMessageOptions } from "@/types/ipc"; +import type { CompactionRequestData } from "@/types/message"; + +describe("applyCompactionOverrides", () => { + const baseOptions: SendMessageOptions = { + model: "anthropic:claude-sonnet-4-5", + thinkingLevel: "medium", + toolPolicy: [], + mode: "exec", + }; + + it("uses workspace model when no override specified", () => { + const compactData: CompactionRequestData = {}; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.model).toBe("anthropic:claude-sonnet-4-5"); + expect(result.mode).toBe("compact"); + }); + + it("applies custom model override", () => { + const compactData: CompactionRequestData = { + model: "anthropic:claude-haiku-4-5", + }; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.model).toBe("anthropic:claude-haiku-4-5"); + }); + + it("sets thinking to off for Anthropic models", () => { + const compactData: CompactionRequestData = { + model: "anthropic:claude-haiku-4-5", + }; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.thinkingLevel).toBe("off"); + }); + + it("preserves workspace thinking level for non-Anthropic models", () => { + const compactData: CompactionRequestData = { + model: "openai:gpt-5-pro", + }; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.thinkingLevel).toBe("medium"); + }); + + it("applies maxOutputTokens override", () => { + const compactData: CompactionRequestData = { + maxOutputTokens: 8000, + }; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.maxOutputTokens).toBe(8000); + }); + + it("sets compact mode and tool policy", () => { + const compactData: CompactionRequestData = {}; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.mode).toBe("compact"); + expect(result.toolPolicy).toEqual([{ regex_match: "compact_summary", action: "require" }]); + }); + + it("applies all overrides together", () => { + const compactData: CompactionRequestData = { + model: "openai:gpt-5", + maxOutputTokens: 5000, + }; + const result = applyCompactionOverrides(baseOptions, compactData); + + expect(result.model).toBe("openai:gpt-5"); + expect(result.maxOutputTokens).toBe(5000); + expect(result.mode).toBe("compact"); + expect(result.thinkingLevel).toBe("medium"); // Non-Anthropic preserves original + }); +}); diff --git a/src/utils/messages/compactionOptions.ts b/src/utils/messages/compactionOptions.ts new file mode 100644 index 0000000000..97809b20ad --- /dev/null +++ b/src/utils/messages/compactionOptions.ts @@ -0,0 +1,41 @@ +/** + * Compaction options transformation + * + * Single source of truth for converting compaction metadata into SendMessageOptions. + * Used by both ChatInput (initial send) and useResumeManager (resume after interruption). + */ + +import type { SendMessageOptions } from "@/types/ipc"; +import type { CompactionRequestData } from "@/types/message"; + +/** + * Apply compaction-specific option overrides to base options. + * + * This function is the single source of truth for how compaction metadata + * transforms workspace defaults. Both initial sends and stream resumption + * use this function to ensure consistent behavior. + * + * @param baseOptions - Workspace default options (from localStorage or useSendMessageOptions) + * @param compactData - Compaction request metadata from /compact command + * @returns Final SendMessageOptions with compaction overrides applied + */ +export function applyCompactionOverrides( + baseOptions: SendMessageOptions, + compactData: CompactionRequestData +): SendMessageOptions { + // Use custom model if specified, otherwise use workspace default + const compactionModel = compactData.model ?? baseOptions.model; + + // Anthropic models don't support thinking, always use "off" + // Non-Anthropic models keep workspace default (backend will enforce policy) + const isAnthropic = compactionModel.startsWith("anthropic:"); + + return { + ...baseOptions, + model: compactionModel, + thinkingLevel: isAnthropic ? "off" : baseOptions.thinkingLevel, + toolPolicy: [{ regex_match: "compact_summary", action: "require" }], + maxOutputTokens: compactData.maxOutputTokens, + mode: "compact" as const, + }; +} diff --git a/src/utils/slashCommands/compact.test.ts b/src/utils/slashCommands/compact.test.ts index 76a69c35bd..e83e236eff 100644 --- a/src/utils/slashCommands/compact.test.ts +++ b/src/utils/slashCommands/compact.test.ts @@ -10,6 +10,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, + model: undefined, }); }); @@ -19,6 +20,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: 5000, continueMessage: undefined, + model: undefined, }); }); @@ -28,6 +30,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Continue where we left off", + model: undefined, }); }); @@ -37,6 +40,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: 3000, continueMessage: "Keep going", + model: undefined, }); }); @@ -46,6 +50,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: 3000, continueMessage: "Keep going", + model: undefined, }); }); @@ -55,6 +60,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, + model: undefined, }); }); @@ -64,6 +70,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Keep", + model: undefined, }); }); @@ -113,6 +120,76 @@ describe("compact command parser", () => { subcommand: "-t requires a positive number, got 0", }); }); + + it("parses -m flag with model abbreviation", () => { + const result = parseCommand("/compact -m sonnet"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: undefined, + model: "anthropic:claude-sonnet-4-5", + }); + }); + + it("parses -m flag with full model string", () => { + const result = parseCommand("/compact -m anthropic:claude-opus-4-1"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: undefined, + model: "anthropic:claude-opus-4-1", + }); + }); + + it("parses -m flag with other flags", () => { + const result = parseCommand('/compact -t 5000 -m haiku -c "Keep going"'); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: 5000, + continueMessage: "Keep going", + model: "anthropic:claude-haiku-4-5", + }); + }); + + it("parses -m flag in any position", () => { + const result = parseCommand('/compact -m opus -t 3000 -c "Continue"'); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: 3000, + continueMessage: "Continue", + model: "anthropic:claude-opus-4-1", + }); + }); + + it("handles -m without model (undefined)", () => { + const result = parseCommand("/compact -m"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: undefined, + model: undefined, + }); + }); + + it("resolves model abbreviations case-sensitively", () => { + const result = parseCommand("/compact -m codex"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: undefined, + model: "openai:gpt-5-codex", + }); + }); + + it("treats unknown abbreviations as full model strings", () => { + const result = parseCommand("/compact -m custom:model"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: undefined, + model: "custom:model", + }); + }); }); it("rejects extra positional arguments", () => { @@ -140,6 +217,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Continue implementing the auth system", + model: undefined, }); }); @@ -149,6 +227,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: 5000, continueMessage: "Keep working on the feature", + model: undefined, }); }); @@ -158,6 +237,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Line 1\nLine 2\nLine 3", + model: undefined, }); }); @@ -167,6 +247,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Continue after empty line", + model: undefined, }); }); @@ -176,6 +257,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Indented message\n More indented", + model: undefined, }); }); @@ -185,6 +267,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Flag message", + model: undefined, }); }); @@ -194,6 +277,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: 3000, continueMessage: "Keep going", + model: undefined, }); }); @@ -203,6 +287,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Continue here", + model: undefined, }); }); @@ -212,6 +297,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, + model: undefined, }); }); @@ -222,6 +308,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "-t should be treated as message content", + model: undefined, }); }); @@ -231,6 +318,27 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: 5000, continueMessage: "-c this is not a flag", + model: undefined, + }); + }); + + it("parses -m flag with multiline continue message", () => { + const result = parseCommand("/compact -m haiku\nContinue with the implementation"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: undefined, + continueMessage: "Continue with the implementation", + model: "anthropic:claude-haiku-4-5", + }); + }); + + it("parses all flags with multiline continue message", () => { + const result = parseCommand("/compact -t 5000 -m sonnet\nFinish the refactoring"); + expect(result).toEqual({ + type: "compact", + maxOutputTokens: 5000, + continueMessage: "Finish the refactoring", + model: "anthropic:claude-sonnet-4-5", }); }); }); diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts index b4657ab12a..0715ef24dd 100644 --- a/src/utils/slashCommands/registry.ts +++ b/src/utils/slashCommands/registry.ts @@ -172,7 +172,7 @@ const truncateCommandDefinition: SlashCommandDefinition = { const compactCommandDefinition: SlashCommandDefinition = { key: "compact", description: - "Compact conversation history using AI summarization. Use -t to set max output tokens. Add continue message on lines after the command.", + "Compact conversation history using AI summarization. Use -t to set max output tokens, -m to set compaction model. Add continue message on lines after the command.", handler: ({ rawInput }): ParsedCommand => { // Split rawInput into first line (for flags) and remaining lines (for multiline continue) // rawInput format: "-t 5000\nContinue here" or "\nContinue here" (starts with newline if no flags) @@ -189,7 +189,7 @@ const compactCommandDefinition: SlashCommandDefinition = { // Parse flags from first line using minimist const parsed = minimist(firstLineTokens, { - string: ["t", "c"], + string: ["t", "c", "m"], unknown: (arg: string) => { // Unknown flags starting with - are errors if (arg.startsWith("-")) { @@ -201,7 +201,7 @@ const compactCommandDefinition: SlashCommandDefinition = { // Check for unknown flags (only from first line) const unknownFlags = firstLineTokens.filter( - (token) => token.startsWith("-") && token !== "-t" && token !== "-c" + (token) => token.startsWith("-") && token !== "-t" && token !== "-c" && token !== "-m" ); if (unknownFlags.length > 0) { return { @@ -225,6 +225,14 @@ const compactCommandDefinition: SlashCommandDefinition = { maxOutputTokens = tokens; } + // Handle -m (model) flag: resolve abbreviation if present, otherwise use as-is + let model: string | undefined; + if (parsed.m !== undefined && typeof parsed.m === "string" && parsed.m.trim().length > 0) { + const modelInput = parsed.m.trim(); + // Check if it's an abbreviation + model = MODEL_ABBREVIATIONS[modelInput] ?? modelInput; + } + // Reject extra positional arguments UNLESS they're from multiline content // (multiline content gets parsed as positional args by minimist since newlines become spaces) if (parsed._.length > 0 && !hasMultilineContent) { @@ -248,7 +256,7 @@ const compactCommandDefinition: SlashCommandDefinition = { continueMessage = remainingLines; } - return { type: "compact", maxOutputTokens, continueMessage }; + return { type: "compact", maxOutputTokens, continueMessage, model }; }, }; diff --git a/src/utils/slashCommands/types.ts b/src/utils/slashCommands/types.ts index 1f0b586701..1d0b7a9c25 100644 --- a/src/utils/slashCommands/types.ts +++ b/src/utils/slashCommands/types.ts @@ -11,7 +11,7 @@ export type ParsedCommand = | { type: "model-help" } | { type: "clear" } | { type: "truncate"; percentage: number } - | { type: "compact"; maxOutputTokens?: number; continueMessage?: string } + | { type: "compact"; maxOutputTokens?: number; continueMessage?: string; model?: string } | { type: "unknown-command"; command: string; subcommand?: string } | null;