coder · ammario · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high";
 export type ThinkingLevelOn = Exclude<ThinkingLevel, "off">;
 
 /**
- * Anthropic effort level mapping
+ * Anthropic thinking token budget mapping
  *
- * Maps our unified thinking levels to Anthropic's effort parameter:
- * - off: No effort specified (undefined)
- * - low: Most efficient - significant token savings
- * - medium: Balanced approach with moderate token savings
- * - high: Maximum capability (default behavior)
+ * These heuristics balance thinking depth with response time and cost.
+ * Used for models that support extended thinking with budgetTokens
+ * (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.)
  *
- * The effort parameter controls all token spend including thinking,
- * text responses, and tool calls. Unlike budget_tokens, it doesn't require
- * thinking to be explicitly enabled.
+ * - off: No extended thinking
+ * - low: Quick thinking for straightforward tasks (4K tokens)
+ * - medium: Standard thinking for moderate complexity (10K tokens)
+ * - high: Deep thinking for complex problems (20K tokens)
+ */
+export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
+  off: 0,
+  low: 4000,
+  medium: 10000,
+  high: 20000,
+};
+
+/**
+ * Anthropic Opus 4.5 effort parameter mapping
+ *
+ * The effort parameter is a new feature ONLY available for Claude Opus 4.5.
+ * It controls how much computational work the model applies to each task.
+ *
+ * Other Anthropic models must use the thinking.budgetTokens approach instead.
+ *
+ * @see https://www.anthropic.com/news/claude-opus-4-5
  */
 export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
   off: undefined,

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -0,0 +1,119 @@
+/**
+ * Tests for provider options builder
+ */
+
+import { describe, test, expect, mock } from "bun:test";
+import { buildProviderOptions } from "./providerOptions";
+import type { ThinkingLevel } from "@/common/types/thinking";
+
+// Mock the log module to avoid console noise
+void mock.module("@/node/services/log", () => ({
+  log: {
+    debug: (): void => undefined,
+    info: (): void => undefined,
+    warn: (): void => undefined,
+    error: (): void => undefined,
+  },
+}));
+
+// Mock enforceThinkingPolicy to pass through
+void mock.module("@/browser/utils/thinking/policy", () => ({
+  enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level,
+}));
+
+describe("buildProviderOptions - Anthropic", () => {
+  describe("Opus 4.5 (effort parameter)", () => {
+    test("should use effort parameter for claude-opus-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          effort: "medium",
+        },
+      });
+    });
+
+    test("should use effort parameter for claude-opus-4-5-20251101", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          effort: "high",
+        },
+      });
+    });
+
+    test("should omit effort when thinking is off for Opus 4.5", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+        },
+      });
+    });
+  });
+
+  describe("Other Anthropic models (thinking/budgetTokens)", () => {
+    test("should use thinking.budgetTokens for claude-sonnet-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 10000,
+          },
+        },
+      });
+    });
+
+    test("should use thinking.budgetTokens for claude-opus-4-1", () => {
+      const result = buildProviderOptions("anthropic:claude-opus-4-1", "high");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 20000,
+          },
+        },
+      });
+    });
+
+    test("should use thinking.budgetTokens for claude-haiku-4-5", () => {
+      const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 4000,
+          },
+        },
+      });
+    });
+
+    test("should omit thinking when thinking is off for non-Opus 4.5", () => {
+      const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off");
+
+      expect(result).toEqual({
+        anthropic: {
+          disableParallelToolUse: false,
+          sendReasoning: true,
+        },
+      });
+    });
+  });
+});
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions";
 import type { ThinkingLevel } from "@/common/types/thinking";
 import {
   ANTHROPIC_EFFORT,
+  ANTHROPIC_THINKING_BUDGETS,
   GEMINI_THINKING_BUDGETS,
   OPENAI_REASONING_EFFORT,
   OPENROUTER_REASONING_EFFORT,
@@ -83,19 +84,53 @@ export function buildProviderOptions(
 
   // Build Anthropic-specific options
   if (provider === "anthropic") {
-    const effort = ANTHROPIC_EFFORT[effectiveThinking];
+    // Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5")
+    const [, modelName] = modelString.split(":");
+
+    // Check if this is Opus 4.5 (supports effort parameter)
+    // Opus 4.5 uses the new "effort" parameter for reasoning control
+    // All other Anthropic models use the "thinking" parameter with budgetTokens
+    const isOpus45 = modelName?.includes("opus-4-5") ?? false;
+
+    if (isOpus45) {
+      // Opus 4.5: Use effort parameter for reasoning control
+      const effort = ANTHROPIC_EFFORT[effectiveThinking];
+      log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
+        effort,
+        thinkingLevel: effectiveThinking,
+      });
+
+      const options: ProviderOptions = {
+        anthropic: {
+          disableParallelToolUse: false, // Always enable concurrent tool execution
+          sendReasoning: true, // Include reasoning traces in requests sent to the model
+          // Use effort parameter (Opus 4.5 only) to control token spend
+          // SDK auto-adds beta header "effort-2025-11-24" when effort is set
+          ...(effort && { effort }),
+        },
+      };
+      log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);
+      return options;
+    }
+
+    // Other Anthropic models: Use thinking parameter with budgetTokens
+    const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
     log.debug("buildProviderOptions: Anthropic config", {
-      effort,
+      budgetTokens,
       thinkingLevel: effectiveThinking,
     });
 
     const options: ProviderOptions = {
       anthropic: {
         disableParallelToolUse: false, // Always enable concurrent tool execution
         sendReasoning: true, // Include reasoning traces in requests sent to the model
-        // Use effort parameter to control token spend (thinking, text, and tool calls)
-        // SDK auto-adds beta header "effort-2025-11-24" when effort is set
-        ...(effort && { effort }),
+        // Conditionally add thinking configuration (non-Opus 4.5 models)
+        ...(budgetTokens > 0 && {
+          thinking: {
+            type: "enabled",
+            budgetTokens,
+          },
+        }),
       },
     };
     log.debug("buildProviderOptions: Returning Anthropic options", options);

diff --git a/tests/ipcMain/sendMessage.reasoning.test.ts b/tests/ipcMain/sendMessage.reasoning.test.ts
@@ -0,0 +1,60 @@
+/**
+ * Integration tests for reasoning/thinking functionality across Anthropic models.
+ * Verifies Opus 4.5 uses `effort` and Sonnet 4.5 uses `thinking.budgetTokens`.
+ */
+
+import { shouldRunIntegrationTests, validateApiKeys } from "./setup";
+import { sendMessage, assertStreamSuccess, waitForStreamSuccess } from "./helpers";
+import { createSharedRepo, cleanupSharedRepo, withSharedWorkspace } from "./sendMessageTestHelpers";
+import { KNOWN_MODELS } from "@/common/constants/knownModels";
+
+const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
+
+if (shouldRunIntegrationTests()) {
+  validateApiKeys(["ANTHROPIC_API_KEY"]);
+}
+
+beforeAll(createSharedRepo);
+afterAll(cleanupSharedRepo);
+
+describeIntegration("Anthropic reasoning parameter tests", () => {
+  test.concurrent(
+    "Sonnet 4.5 with thinking (budgetTokens)",
+    async () => {
+      await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
+        const result = await sendMessage(
+          env.mockIpcRenderer,
+          workspaceId,
+          "What is 2+2? Answer in one word.",
+          { model: KNOWN_MODELS.SONNET.id, thinkingLevel: "low" }
+        );
+        expect(result.success).toBe(true);
+
+        const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 30000);
+        assertStreamSuccess(collector);
+        expect(collector.getDeltas().length).toBeGreaterThan(0);
+      });
+    },
+    60000
+  );
+
+  test.concurrent(
+    "Opus 4.5 with thinking (effort)",
+    async () => {
+      await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => {
+        const result = await sendMessage(
+          env.mockIpcRenderer,
+          workspaceId,
+          "What is 4+4? Answer in one word.",
+          { model: KNOWN_MODELS.OPUS.id, thinkingLevel: "low" }
+        );
+        expect(result.success).toBe(true);
+
+        const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 60000);
+        assertStreamSuccess(collector);
+        expect(collector.getDeltas().length).toBeGreaterThan(0);
+      });
+    },
+    90000
+  );
+});