Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ jobs:

- name: Install and setup shfmt
run: |
if [[ ! -f "$HOME/.local/bin/shfmt" ]]; then
# Install shfmt if not cached or if cached binary is broken
if [[ ! -f "$HOME/.local/bin/shfmt" ]] || ! "$HOME/.local/bin/shfmt" --version >/dev/null 2>&1; then
curl -sS https://webinstall.dev/shfmt | bash
fi
echo "$HOME/.local/bin" >> $GITHUB_PATH
Expand Down
19 changes: 15 additions & 4 deletions docs/context-management.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ Compress conversation history using AI summarization. Replaces the conversation
### Syntax

```
/compact [-t <tokens>]
/compact [-t <tokens>] [-m <model>]
[continue message on subsequent lines]
```

### Options

- `-t <tokens>` - Maximum output tokens for the summary (default: ~2000 words)
- `-m <model>` - Model to use for compaction (default: workspace model). Supports abbreviations like `haiku`, `sonnet`, or full model strings

### Examples

Expand All @@ -69,6 +70,14 @@ Compress conversation history using AI summarization. Replaces the conversation
/compact -t 5000
```

**Choose compaction model:**

```
/compact -m haiku
```

Use Haiku for faster, lower-cost compaction.

**Auto-continue with custom message:**

```
Expand All @@ -88,16 +97,18 @@ Make sure to add tests for the error cases.

Continue messages can span multiple lines for more detailed instructions.

**Combine token limit and auto-continue:**
**Combine all options:**

```
/compact -t 3000
/compact -m haiku -t 8000
Keep working on the feature
```

Combine custom model, token limit, and auto-continue message.

### Notes

- Uses the selected LLM to summarize conversation history
- Uses the specified model (or workspace model by default) to summarize conversation history
- Preserves actionable context and specific details
- **Irreversible** - original messages are replaced
- Continue message is sent once after compaction completes (not persisted)
Expand Down
4 changes: 3 additions & 1 deletion src/components/AIView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,9 @@ const AIViewInner: React.FC<AIViewProps> = ({
<StreamingBarrier
statusText={
isCompacting
? "compacting..."
? currentModel
? `${getModelName(currentModel)} compacting...`
: "compacting..."
: currentModel
? `${getModelName(currentModel)} streaming...`
: "streaming..."
Expand Down
25 changes: 13 additions & 12 deletions src/components/ChatInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ import { VimTextArea } from "./VimTextArea";
import { ImageAttachments, type ImageAttachment } from "./ImageAttachments";

import type { ThinkingLevel } from "@/types/thinking";
import type { CmuxFrontendMetadata } from "@/types/message";
import type { CmuxFrontendMetadata, CompactionRequestData } from "@/types/message";
import type { SendMessageOptions } from "@/types/ipc";
import { applyCompactionOverrides } from "@/utils/messages/compactionOptions";

const InputSection = styled.div`
position: relative;
Expand Down Expand Up @@ -304,22 +305,22 @@ function prepareCompactionMessage(

const messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Use approximately ${targetWords} words.`;

// Create compaction metadata (will be stored in user message)
const compactData: CompactionRequestData = {
model: parsed.model,
maxOutputTokens: parsed.maxOutputTokens,
continueMessage: parsed.continueMessage,
};

const metadata: CmuxFrontendMetadata = {
type: "compaction-request",
rawCommand: command,
parsed: {
maxOutputTokens: parsed.maxOutputTokens,
continueMessage: parsed.continueMessage,
},
parsed: compactData,
};

const isAnthropic = sendMessageOptions.model.startsWith("anthropic:");
const options: Partial<SendMessageOptions> = {
thinkingLevel: isAnthropic ? "off" : sendMessageOptions.thinkingLevel,
toolPolicy: [{ regex_match: "compact_summary", action: "require" }],
maxOutputTokens: parsed.maxOutputTokens,
mode: "compact" as const,
};
// Apply compaction overrides using shared transformation function
// This same function is used by useResumeManager to ensure consistency
const options = applyCompactionOverrides(sendMessageOptions, compactData);

return { messageText, metadata, options };
}
Expand Down
16 changes: 15 additions & 1 deletion src/hooks/useResumeManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { getAutoRetryKey, getRetryStateKey } from "@/constants/storage";
import { getSendOptionsFromStorage } from "@/utils/messages/sendOptions";
import { readPersistedState } from "./usePersistedState";
import { hasInterruptedStream } from "@/utils/messages/retryEligibility";
import { applyCompactionOverrides } from "@/utils/messages/compactionOptions";

interface RetryState {
attempt: number;
Expand Down Expand Up @@ -139,7 +140,20 @@ export function useResumeManager() {
const { attempt } = retryState;

try {
const options = getSendOptionsFromStorage(workspaceId);
// Start with workspace defaults
let options = getSendOptionsFromStorage(workspaceId);

// Check if last user message was a compaction request
const state = workspaceStatesRef.current.get(workspaceId);
if (state) {
const lastUserMsg = [...state.messages].reverse().find((msg) => msg.type === "user");
if (lastUserMsg?.compactionRequest) {
// Apply compaction overrides using shared function (same as ChatInput)
// This ensures custom model/tokens are preserved across resume
options = applyCompactionOverrides(options, lastUserMsg.compactionRequest.parsed);
}
}

const result = await window.api.workspace.resumeStream(workspaceId, options);

if (!result.success) {
Expand Down
9 changes: 8 additions & 1 deletion src/services/agentSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { SendMessageError } from "@/types/errors";
import { createUnknownSendMessageError } from "@/services/utils/sendMessageError";
import type { Result } from "@/types/result";
import { Ok, Err } from "@/types/result";
import { enforceThinkingPolicy } from "@/utils/thinking/policy";

interface ImagePart {
image: string;
Expand Down Expand Up @@ -297,11 +298,17 @@ export class AgentSession {
return Err(createUnknownSendMessageError(historyResult.error));
}

// Enforce thinking policy for the specified model (single source of truth)
// This ensures model-specific requirements are met regardless of where the request originates
const effectiveThinkingLevel = options?.thinkingLevel
? enforceThinkingPolicy(modelString, options.thinkingLevel)
: undefined;

const streamResult = await this.aiService.streamMessage(
historyResult.data,
this.workspaceId,
modelString,
options?.thinkingLevel,
effectiveThinkingLevel,
options?.toolPolicy,
undefined,
options?.additionalSystemInstructions,
Expand Down
12 changes: 8 additions & 4 deletions src/types/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@ import type { LanguageModelV2Usage } from "@ai-sdk/provider";
import type { StreamErrorType } from "./errors";
import type { ToolPolicy } from "@/utils/tools/toolPolicy";

// Parsed compaction request data (shared type for consistency)
export interface CompactionRequestData {
model?: string; // Custom model override for compaction
maxOutputTokens?: number;
continueMessage?: string;
}

// Frontend-specific metadata stored in cmuxMetadata field
// Backend stores this as-is without interpretation (black-box)
export type CmuxFrontendMetadata =
| {
type: "compaction-request";
rawCommand: string; // The original /compact command as typed by user (for display)
parsed: {
maxOutputTokens?: number;
continueMessage?: string;
};
parsed: CompactionRequestData;
}
| {
type: "compaction-result";
Expand Down
81 changes: 81 additions & 0 deletions src/utils/messages/compactionOptions.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/**
* Tests for compaction options transformation
*/

import { applyCompactionOverrides } from "./compactionOptions";
import type { SendMessageOptions } from "@/types/ipc";
import type { CompactionRequestData } from "@/types/message";

describe("applyCompactionOverrides", () => {
const baseOptions: SendMessageOptions = {
model: "anthropic:claude-sonnet-4-5",
thinkingLevel: "medium",
toolPolicy: [],
mode: "exec",
};

it("uses workspace model when no override specified", () => {
const compactData: CompactionRequestData = {};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.model).toBe("anthropic:claude-sonnet-4-5");
expect(result.mode).toBe("compact");
});

it("applies custom model override", () => {
const compactData: CompactionRequestData = {
model: "anthropic:claude-haiku-4-5",
};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.model).toBe("anthropic:claude-haiku-4-5");
});

it("sets thinking to off for Anthropic models", () => {
const compactData: CompactionRequestData = {
model: "anthropic:claude-haiku-4-5",
};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.thinkingLevel).toBe("off");
});

it("preserves workspace thinking level for non-Anthropic models", () => {
const compactData: CompactionRequestData = {
model: "openai:gpt-5-pro",
};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.thinkingLevel).toBe("medium");
});

it("applies maxOutputTokens override", () => {
const compactData: CompactionRequestData = {
maxOutputTokens: 8000,
};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.maxOutputTokens).toBe(8000);
});

it("sets compact mode and tool policy", () => {
const compactData: CompactionRequestData = {};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.mode).toBe("compact");
expect(result.toolPolicy).toEqual([{ regex_match: "compact_summary", action: "require" }]);
});

it("applies all overrides together", () => {
const compactData: CompactionRequestData = {
model: "openai:gpt-5",
maxOutputTokens: 5000,
};
const result = applyCompactionOverrides(baseOptions, compactData);

expect(result.model).toBe("openai:gpt-5");
expect(result.maxOutputTokens).toBe(5000);
expect(result.mode).toBe("compact");
expect(result.thinkingLevel).toBe("medium"); // Non-Anthropic preserves original
});
});
41 changes: 41 additions & 0 deletions src/utils/messages/compactionOptions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Compaction options transformation
*
* Single source of truth for converting compaction metadata into SendMessageOptions.
* Used by both ChatInput (initial send) and useResumeManager (resume after interruption).
*/

import type { SendMessageOptions } from "@/types/ipc";
import type { CompactionRequestData } from "@/types/message";

/**
* Apply compaction-specific option overrides to base options.
*
* This function is the single source of truth for how compaction metadata
* transforms workspace defaults. Both initial sends and stream resumption
* use this function to ensure consistent behavior.
*
* @param baseOptions - Workspace default options (from localStorage or useSendMessageOptions)
* @param compactData - Compaction request metadata from /compact command
* @returns Final SendMessageOptions with compaction overrides applied
*/
export function applyCompactionOverrides(
baseOptions: SendMessageOptions,
compactData: CompactionRequestData
): SendMessageOptions {
// Use custom model if specified, otherwise use workspace default
const compactionModel = compactData.model ?? baseOptions.model;

// Anthropic models don't support thinking, always use "off"
// Non-Anthropic models keep workspace default (backend will enforce policy)
const isAnthropic = compactionModel.startsWith("anthropic:");

return {
...baseOptions,
model: compactionModel,
thinkingLevel: isAnthropic ? "off" : baseOptions.thinkingLevel,
toolPolicy: [{ regex_match: "compact_summary", action: "require" }],
maxOutputTokens: compactData.maxOutputTokens,
mode: "compact" as const,
};
}
Loading