Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/services/streamManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,9 @@ export class StreamManager extends EventEmitter {
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
toolChoice: toolChoice as any, // Force tool use when required by policy
// When toolChoice is set (required tool), limit to 1 step to prevent infinite loops
// Otherwise allow unlimited steps for multi-turn tool use
// Otherwise allow effectively unlimited steps (100k) for autonomous multi-turn workflows.
// IMPORTANT: Models should be able to run for hours or even days calling tools repeatedly
// to complete complex tasks. The stopWhen condition allows the model to decide when it's done.
...(toolChoice ? { maxSteps: 1 } : { stopWhen: stepCountIs(100000) }),
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment
providerOptions: providerOptions as any, // Pass provider-specific options (thinking/reasoning config)
Expand Down
19 changes: 10 additions & 9 deletions tests/ipcMain/openai-web-search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,20 @@ describeIntegration("OpenAI web_search integration tests", () => {
const { env, workspaceId, cleanup } = await setupWorkspace("openai");
try {
// This prompt reliably triggers the reasoning + web_search bug:
// 1. Gold price search always triggers web_search (pricing data)
// 2. Mathematical computation requires reasoning
// 3. High reasoning effort ensures reasoning is present
// 1. Weather search triggers web_search (real-time data)
// 2. Simple analysis requires reasoning
// 3. Medium reasoning effort ensures reasoning is present while avoiding excessive loops
// This combination exposed the itemId bug on main branch
// Note: Previous prompt (gold price + Collatz) caused excessive tool loops in CI
const result = await sendMessageWithModel(
env.mockIpcRenderer,
workspaceId,
"Find the current gold price per ounce via web search. " +
"Then compute round(price^2) and determine how many Collatz steps it takes to reach 1.",
"Use web search to find the current weather in San Francisco. " +
"Then tell me if it's a good day for a picnic.",
"openai",
"gpt-5-codex",
{
thinkingLevel: "high", // Ensure reasoning is used
thinkingLevel: "medium", // Ensure reasoning without excessive deliberation
}
);

Expand All @@ -49,8 +50,8 @@ describeIntegration("OpenAI web_search integration tests", () => {
// Collect and verify stream events
const collector = createEventCollector(env.sentEvents, workspaceId);

// Wait for stream to complete
const streamEnd = await collector.waitForEvent("stream-end", 120000);
// Wait for stream to complete (90s should be enough for simple weather + analysis)
const streamEnd = await collector.waitForEvent("stream-end", 90000);
expect(streamEnd).toBeDefined();

// Verify no errors occurred - this is the KEY test
Expand Down Expand Up @@ -85,6 +86,6 @@ describeIntegration("OpenAI web_search integration tests", () => {
await cleanup();
}
},
150000 // 150 second timeout - reasoning + web_search + computation takes time
120000 // 120 second timeout - reasoning + web_search should complete faster with simpler task
);
});