diff --git a/src/services/streamManager.ts b/src/services/streamManager.ts
index b7fc44bd0..fc9a656a9 100644
--- a/src/services/streamManager.ts
+++ b/src/services/streamManager.ts
@@ -476,7 +476,9 @@ export class StreamManager extends EventEmitter {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
         toolChoice: toolChoice as any, // Force tool use when required by policy
         // When toolChoice is set (required tool), limit to 1 step to prevent infinite loops
-        // Otherwise allow unlimited steps for multi-turn tool use
+        // Otherwise allow effectively unlimited steps (100k) for autonomous multi-turn workflows.
+        // IMPORTANT: Models should be able to run for hours or even days calling tools repeatedly
+        // to complete complex tasks. The stopWhen condition allows the model to decide when it's done.
         ...(toolChoice ? { maxSteps: 1 } : { stopWhen: stepCountIs(100000) }),
         // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
         providerOptions: providerOptions as any, // Pass provider-specific options (thinking/reasoning config)
diff --git a/tests/ipcMain/openai-web-search.test.ts b/tests/ipcMain/openai-web-search.test.ts
index 5124b420d..ba4a03f06 100644
--- a/tests/ipcMain/openai-web-search.test.ts
+++ b/tests/ipcMain/openai-web-search.test.ts
@@ -27,19 +27,20 @@ describeIntegration("OpenAI web_search integration tests", () => {
       const { env, workspaceId, cleanup } = await setupWorkspace("openai");
       try {
         // This prompt reliably triggers the reasoning + web_search bug:
-        // 1. Gold price search always triggers web_search (pricing data)
-        // 2. Mathematical computation requires reasoning
-        // 3. High reasoning effort ensures reasoning is present
+        // 1. Weather search triggers web_search (real-time data)
+        // 2. Simple analysis requires reasoning
+        // 3. Medium reasoning effort ensures reasoning is present while avoiding excessive loops
         // This combination exposed the itemId bug on main branch
+        // Note: Previous prompt (gold price + Collatz) caused excessive tool loops in CI
         const result = await sendMessageWithModel(
           env.mockIpcRenderer,
           workspaceId,
-          "Find the current gold price per ounce via web search. " +
-            "Then compute round(price^2) and determine how many Collatz steps it takes to reach 1.",
+          "Use web search to find the current weather in San Francisco. " +
+            "Then tell me if it's a good day for a picnic.",
           "openai",
           "gpt-5-codex",
           {
-            thinkingLevel: "high", // Ensure reasoning is used
+            thinkingLevel: "medium", // Ensure reasoning without excessive deliberation
           }
         );
 
@@ -49,8 +50,8 @@ describeIntegration("OpenAI web_search integration tests", () => {
         // Collect and verify stream events
         const collector = createEventCollector(env.sentEvents, workspaceId);
 
-        // Wait for stream to complete
-        const streamEnd = await collector.waitForEvent("stream-end", 120000);
+        // Wait for stream to complete (90s should be enough for simple weather + analysis)
+        const streamEnd = await collector.waitForEvent("stream-end", 90000);
         expect(streamEnd).toBeDefined();
 
         // Verify no errors occurred - this is the KEY test
@@ -85,6 +86,6 @@ describeIntegration("OpenAI web_search integration tests", () => {
         await cleanup();
       }
     },
-    150000 // 150 second timeout - reasoning + web_search + computation takes time
+    120000 // 120 second timeout - reasoning + web_search should complete faster with simpler task
   );
 });