Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions src/browser/components/ThinkingSlider.stories.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ export const DifferentModels: Story = {
</div>

<div>
<div className="text-muted-light font-primary mb-2 text-xs">
Claude Opus 4.5 (3 levels: low/medium/high)
</div>
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (4 levels)</div>
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
</div>

Expand Down Expand Up @@ -116,18 +114,19 @@ export const InteractiveDemo: Story = {
},
};

export const Opus45ThreeLevels: Story = {
export const Opus45AllLevels: Story = {
args: { modelString: "anthropic:claude-opus-4-5" },
render: (args) => (
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
<div className="text-bright font-primary mb-2.5 text-[13px]">
Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
Claude Opus 4.5 uses the effort parameter with optional extended thinking:
</div>
<ThinkingSliderComponent modelString={args.modelString} />
<div className="text-muted-light font-primary mt-2.5 text-[11px]">
• <strong>Low</strong>: Conservative token usage
<br />• <strong>Medium</strong>: Balanced usage (default)
<br />• <strong>High</strong>: Best results, more tokens
• <strong>Off</strong>: effort=&ldquo;low&rdquo;, no visible reasoning
<br />• <strong>Low</strong>: effort=&ldquo;low&rdquo;, visible reasoning
<br />• <strong>Medium</strong>: effort=&ldquo;medium&rdquo;, visible reasoning
<br />• <strong>High</strong>: effort=&ldquo;high&rdquo;, visible reasoning
</div>
</div>
),
Expand Down
19 changes: 10 additions & 9 deletions src/browser/utils/thinking/policy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => {
]);
});

test("returns low/medium/high for Opus 4.5", () => {
test("returns all levels for Opus 4.5 (uses default policy)", () => {
// Opus 4.5 uses the default policy - no special case needed
// The effort parameter handles the "off" case by setting effort="low"
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
"off",
"low",
"medium",
"high",
]);
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
"off",
"low",
"medium",
"high",
Expand Down Expand Up @@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => {
});
});

describe("Opus 4.5 (no off option)", () => {
test("allows low/medium/high levels", () => {
describe("Opus 4.5 (all levels supported)", () => {
test("allows all levels including off", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off");
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
});

test("falls back to high when off is requested", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
});

test("falls back to high when off is requested (versioned model)", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
test("allows off for versioned model", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
});
});
});
Expand Down
15 changes: 1 addition & 14 deletions src/browser/utils/thinking/policy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
*
* Rules:
* - openai:gpt-5-pro → ["high"] (only supported level)
* - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
* - gemini-3 → ["low", "high"] (thinking level only)
* - default → ["off", "low", "medium", "high"] (all levels selectable)
*
Expand All @@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
return ["high"];
}

// Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
// Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
if (modelString.includes("opus-4-5")) {
return ["low", "medium", "high"];
}

// Gemini 3 Pro only supports "low" and "high" reasoning levels
if (modelString.includes("gemini-3")) {
return ["low", "high"];
Expand All @@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
*
* Fallback strategy:
* 1. If requested level is allowed, use it
* 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
* 3. Otherwise: prefer "medium" if allowed, else use first allowed level
* 2. Otherwise: prefer "medium" if allowed, else use first allowed level
*/
export function enforceThinkingPolicy(
modelString: string,
Expand All @@ -72,11 +64,6 @@ export function enforceThinkingPolicy(
return requested;
}

// Special case: Opus 4.5 defaults to "high" for best experience
if (modelString.includes("opus-4-5") && allowed.includes("high")) {
return "high";
}

// Fallback: prefer "medium" if allowed, else use first allowed level
return allowed.includes("medium") ? "medium" : allowed[0];
}
4 changes: 2 additions & 2 deletions src/common/types/thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
*
* @see https://www.anthropic.com/news/claude-opus-4-5
*/
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
off: undefined,
export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high"> = {
off: "low",
low: "low",
medium: "medium",
high: "high",
Expand Down
15 changes: 12 additions & 3 deletions src/common/utils/ai/providerOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({

describe("buildProviderOptions - Anthropic", () => {
describe("Opus 4.5 (effort parameter)", () => {
test("should use effort parameter for claude-opus-4-5", () => {
test("should use effort and thinking parameters for claude-opus-4-5", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
thinking: {
type: "enabled",
budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium
},
effort: "medium",
},
});
});

test("should use effort parameter for claude-opus-4-5-20251101", () => {
test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
thinking: {
type: "enabled",
budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high
},
effort: "high",
},
});
});

test("should omit effort when thinking is off for Opus 4.5", () => {
test("should use effort 'low' with no thinking when off for Opus 4.5", () => {
const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");

expect(result).toEqual({
anthropic: {
disableParallelToolUse: false,
sendReasoning: true,
effort: "low", // "off" maps to effort: "low" for efficiency
},
});
});
Expand Down
22 changes: 18 additions & 4 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,34 @@ export function buildProviderOptions(
const isOpus45 = modelName?.includes("opus-4-5") ?? false;

if (isOpus45) {
// Opus 4.5: Use effort parameter for reasoning control
const effort = ANTHROPIC_EFFORT[effectiveThinking];
// Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning
// - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off)
// - "low" → effort: "low", thinking enabled (visible reasoning)
// - "medium" → effort: "medium", thinking enabled
// - "high" → effort: "high", thinking enabled
const effortLevel = ANTHROPIC_EFFORT[effectiveThinking];
const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
effort,
effort: effortLevel,
budgetTokens,
thinkingLevel: effectiveThinking,
});

const options: ProviderOptions = {
anthropic: {
disableParallelToolUse: false, // Always enable concurrent tool execution
sendReasoning: true, // Include reasoning traces in requests sent to the model
// Enable thinking to get visible reasoning traces (only when not "off")
// budgetTokens sets the ceiling; effort controls how eagerly tokens are spent
...(budgetTokens > 0 && {
thinking: {
type: "enabled",
budgetTokens,
},
}),
// Use effort parameter (Opus 4.5 only) to control token spend
// SDK auto-adds beta header "effort-2025-11-24" when effort is set
...(effort && { effort }),
effort: effortLevel,
},
};
log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
Expand Down