Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions src/common/types/thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;

/**
* Anthropic effort level mapping
* Anthropic thinking token budget mapping
*
* Maps our unified thinking levels to Anthropic's effort parameter:
* - off: No effort specified (undefined)
* - low: Most efficient - significant token savings
* - medium: Balanced approach with moderate token savings
* - high: Maximum capability (default behavior)
* These heuristics balance thinking depth with response time and cost.
* Used for models that support extended thinking with budgetTokens
* (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.)
*
* The effort parameter controls all token spend including thinking,
* text responses, and tool calls. Unlike budget_tokens, it doesn't require
* thinking to be explicitly enabled.
* - off: No extended thinking
* - low: Quick thinking for straightforward tasks (4K tokens)
* - medium: Standard thinking for moderate complexity (10K tokens)
* - high: Deep thinking for complex problems (20K tokens)
*/
export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
off: 0,
low: 4000,
medium: 10000,
high: 20000,
};

/**
* Anthropic Opus 4.5 effort parameter mapping
*
* The effort parameter is a new feature ONLY available for Claude Opus 4.5.
* It controls how much computational work the model applies to each task.
*
* Other Anthropic models must use the thinking.budgetTokens approach instead.
*
* @see https://www.anthropic.com/news/claude-opus-4-5
*/
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
off: undefined,
Expand Down
119 changes: 119 additions & 0 deletions src/common/utils/ai/providerOptions.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/**
* Tests for provider options builder
*/

import { describe, test, expect, mock } from "bun:test";
import { buildProviderOptions } from "./providerOptions";
import type { ThinkingLevel } from "@/common/types/thinking";

// Mock the log module to avoid console noise
void mock.module("@/node/services/log", () => ({
log: {
debug: (): void => undefined,
info: (): void => undefined,
warn: (): void => undefined,
error: (): void => undefined,
},
}));

// Mock enforceThinkingPolicy to pass through
void mock.module("@/browser/utils/thinking/policy", () => ({
enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level,
}));

describe("buildProviderOptions - Anthropic", () => {
describe("Opus 4.5 (effort parameter)", () => {
test("should use effort parameter for claude-opus-4-5", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
effort: "medium",
},
});
});

test("should use effort parameter for claude-opus-4-5-20251101", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
effort: "high",
},
});
});

test("should omit effort when thinking is off for Opus 4.5", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
},
});
});
});

describe("Other Anthropic models (thinking/budgetTokens)", () => {
test("should use thinking.budgetTokens for claude-sonnet-4-5", () => {
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
thinking: {
type: "enabled",
budgetTokens: 10000,
},
},
});
});

test("should use thinking.budgetTokens for claude-opus-4-1", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-1", "high");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
thinking: {
type: "enabled",
budgetTokens: 20000,
},
},
});
});

test("should use thinking.budgetTokens for claude-haiku-4-5", () => {
const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
thinking: {
type: "enabled",
budgetTokens: 4000,
},
},
});
});

test("should omit thinking when thinking is off for non-Opus 4.5", () => {
const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
},
});
});
});
});
45 changes: 40 additions & 5 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions";
import type { ThinkingLevel } from "@/common/types/thinking";
import {
ANTHROPIC_EFFORT,
ANTHROPIC_THINKING_BUDGETS,
GEMINI_THINKING_BUDGETS,
OPENAI_REASONING_EFFORT,
OPENROUTER_REASONING_EFFORT,
Expand Down Expand Up @@ -83,19 +84,53 @@ export function buildProviderOptions(

// Build Anthropic-specific options
if (provider === "anthropic") {
const effort = ANTHROPIC_EFFORT[effectiveThinking];
// Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5")
const [, modelName] = modelString.split(":");

// Check if this is Opus 4.5 (supports effort parameter)
// Opus 4.5 uses the new "effort" parameter for reasoning control
// All other Anthropic models use the "thinking" parameter with budgetTokens
const isOpus45 = modelName?.includes("opus-4-5") ?? false;

if (isOpus45) {
// Opus 4.5: Use effort parameter for reasoning control
const effort = ANTHROPIC_EFFORT[effectiveThinking];
log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
effort,
thinkingLevel: effectiveThinking,
});

const options: ProviderOptions = {
anthropic: {
disableParallelToolUse: false, // Always enable concurrent tool execution
sendReasoning: true, // Include reasoning traces in requests sent to the model
// Use effort parameter (Opus 4.5 only) to control token spend
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
...(effort && { effort }),
},
};
log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
return options;
}

// Other Anthropic models: Use thinking parameter with budgetTokens
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
log.debug("buildProviderOptions: Anthropic config", {
effort,
budgetTokens,
thinkingLevel: effectiveThinking,
});

const options: ProviderOptions = {
anthropic: {
disableParallelToolUse: false, // Always enable concurrent tool execution
sendReasoning: true, // Include reasoning traces in requests sent to the model
// Use effort parameter to control token spend (thinking, text, and tool calls)
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
...(effort && { effort }),
// Conditionally add thinking configuration (non-Opus 4.5 models)
...(budgetTokens > 0 && {
thinking: {
type: "enabled",
budgetTokens,
},
}),
},
};
log.debug("buildProviderOptions: Returning Anthropic options", options);
Expand Down
60 changes: 60 additions & 0 deletions tests/ipcMain/sendMessage.reasoning.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
* Integration tests for reasoning/thinking functionality across Anthropic models.
* Verifies Opus 4.5 uses `effort` and Sonnet 4.5 uses `thinking.budgetTokens`.
*/

import { shouldRunIntegrationTests, validateApiKeys } from "./setup";
import { sendMessage, assertStreamSuccess, waitForStreamSuccess } from "./helpers";
import { createSharedRepo, cleanupSharedRepo, withSharedWorkspace } from "./sendMessageTestHelpers";
import { KNOWN_MODELS } from "@/common/constants/knownModels";

const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;

if (shouldRunIntegrationTests()) {
validateApiKeys(["ANTHROPIC_API_KEY"]);
}

beforeAll(createSharedRepo);
afterAll(cleanupSharedRepo);

describeIntegration("Anthropic reasoning parameter tests", () => {
test.concurrent(
"Sonnet 4.5 with thinking (budgetTokens)",
async () => {
await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
const result = await sendMessage(
env.mockIpcRenderer,
workspaceId,
"What is 2+2? Answer in one word.",
{ model: KNOWN_MODELS.SONNET.id, thinkingLevel: "low" }
);
expect(result.success).toBe(true);

const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 30000);
assertStreamSuccess(collector);
expect(collector.getDeltas().length).toBeGreaterThan(0);
});
},
60000
);

test.concurrent(
"Opus 4.5 with thinking (effort)",
async () => {
await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
const result = await sendMessage(
env.mockIpcRenderer,
workspaceId,
"What is 4+4? Answer in one word.",
{ model: KNOWN_MODELS.OPUS.id, thinkingLevel: "low" }
);
expect(result.success).toBe(true);

const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 60000);
assertStreamSuccess(collector);
expect(collector.getDeltas().length).toBeGreaterThan(0);
});
},
90000
);
});