🤖 Fix test flake: limit tool call steps to prevent infinite loops

ammar-agent · ammar-agent · commit 5d49733740b7 · 2025-10-23T20:28:38.000-05:00
Reasoning models (especially gpt-5-codex) can get stuck in infinite tool call loops when combined with web_search and high reasoning effort. This was causing the openai-web-search.test.ts integration test to timeout after 120+ seconds with 15+ tool calls and no completion. Root cause: The stream was using `stopWhen: stepCountIs(100000)` which effectively allowed unlimited tool calls. With reasoning models, the model can keep calling tools indefinitely without reaching a final answer. Fix: Replace unlimited steps with `maxSteps: 25` to prevent infinite loops while still allowing reasonable multi-turn tool use. This value is chosen based on observed failure (15 tool calls) with some buffer. The AI SDK will now stop the stream after 25 tool call rounds, ensuring the stream completes and emits stream-end even if the model gets stuck. Fixes: https://github.com/coder/cmux/actions/runs/18766377932
diff --git a/src/services/streamManager.ts b/src/services/streamManager.ts
@@ -4,7 +4,6 @@ import * as path from "path";
 import * as os from "os";
 import {
   streamText,
-  stepCountIs,
   type ModelMessage,
   type LanguageModel,
   type Tool,
@@ -476,8 +475,8 @@ export class StreamManager extends EventEmitter {
         // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
         toolChoice: toolChoice as any, // Force tool use when required by policy
         // When toolChoice is set (required tool), limit to 1 step to prevent infinite loops
-        // Otherwise allow unlimited steps for multi-turn tool use
-        ...(toolChoice ? { maxSteps: 1 } : { stopWhen: stepCountIs(100000) }),
+        // Otherwise limit to 25 steps to prevent models (especially reasoning models) from getting stuck
+        ...(toolChoice ? { maxSteps: 1 } : { maxSteps: 25 }),
         // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
         providerOptions: providerOptions as any, // Pass provider-specific options (thinking/reasoning config)
         // Default to 32000 tokens if not specified (Anthropic defaults to 4096)