diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts index 5d0db5089..66c5b2fc9 100644 --- a/src/debug/agentSessionCli.ts +++ b/src/debug/agentSessionCli.ts @@ -7,8 +7,8 @@ import { parseArgs } from "util"; import { Config } from "@/config"; import { HistoryService } from "@/services/historyService"; import { PartialService } from "@/services/partialService"; -import { AIService } from "@/services/aiService"; import { InitStateManager } from "@/services/initStateManager"; +import { AIService } from "@/services/aiService"; import { AgentSession, type AgentSessionChatEvent } from "@/services/agentSession"; import { isCaughtUpMessage, @@ -209,8 +209,8 @@ async function main(): Promise { const historyService = new HistoryService(config); const partialService = new PartialService(config, historyService); - const aiService = new AIService(config, historyService, partialService); const initStateManager = new InitStateManager(config); + const aiService = new AIService(config, historyService, partialService, initStateManager); ensureProvidersConfig(config); const session = new AgentSession({ diff --git a/src/debug/replay-history.ts b/src/debug/replay-history.ts old mode 100755 new mode 100644 index c8104984c..fbd6aa31b --- a/src/debug/replay-history.ts +++ b/src/debug/replay-history.ts @@ -17,6 +17,7 @@ import { parseArgs } from "util"; import { defaultConfig } from "@/config"; import type { CmuxMessage } from "@/types/message"; import { createCmuxMessage } from "@/types/message"; +import { InitStateManager } from "@/services/initStateManager"; import { AIService } from "@/services/aiService"; import { HistoryService } from "@/services/historyService"; import { PartialService } from "@/services/partialService"; @@ -123,7 +124,8 @@ async function main() { const config = defaultConfig; const historyService = new HistoryService(config); const partialService = new PartialService(config, historyService); - const aiService = new AIService(config, historyService, partialService); + const initStateManager = new InitStateManager(config); + const aiService = new AIService(config, historyService, partialService, initStateManager); const modelString = values.model ?? "openai:gpt-5-codex"; const thinkingLevel = (values.thinking ?? "high") as "low" | "medium" | "high"; diff --git a/src/services/aiService.test.ts b/src/services/aiService.test.ts index 7acc18a76..fab2bfb6a 100644 --- a/src/services/aiService.test.ts +++ b/src/services/aiService.test.ts @@ -6,6 +6,7 @@ import { describe, it, expect, beforeEach } from "bun:test"; import { AIService } from "./aiService"; import { HistoryService } from "./historyService"; import { PartialService } from "./partialService"; +import { InitStateManager } from "./initStateManager"; import { Config } from "@/config"; describe("AIService", () => { @@ -15,7 +16,8 @@ describe("AIService", () => { const config = new Config(); const historyService = new HistoryService(config); const partialService = new PartialService(config, historyService); - service = new AIService(config, historyService, partialService); + const initStateManager = new InitStateManager(config); + service = new AIService(config, historyService, partialService, initStateManager); }); // Note: These tests are placeholders as Bun doesn't support Jest mocking diff --git a/src/services/aiService.ts b/src/services/aiService.ts index f6a4df95a..00c68dd9d 100644 --- a/src/services/aiService.ts +++ b/src/services/aiService.ts @@ -12,6 +12,7 @@ import type { CmuxMessage, CmuxTextPart } from "@/types/message"; import { createCmuxMessage } from "@/types/message"; import type { Config } from "@/config"; import { StreamManager } from "./streamManager"; +import type { InitStateManager } from "./initStateManager"; import type { SendMessageError } from "@/types/errors"; import { getToolsForModel } from "@/utils/tools/tools"; import { createRuntime } from "@/runtime/runtimeFactory"; @@ -108,10 +109,16 @@ export class AIService extends EventEmitter { private readonly historyService: HistoryService; private readonly partialService: PartialService; private readonly config: Config; + private readonly initStateManager: InitStateManager; private readonly mockModeEnabled: boolean; private readonly mockScenarioPlayer?: MockScenarioPlayer; - constructor(config: Config, historyService: HistoryService, partialService: PartialService) { + constructor( + config: Config, + historyService: HistoryService, + partialService: PartialService, + initStateManager: InitStateManager + ) { super(); // Increase max listeners to accommodate multiple concurrent workspace listeners // Each workspace subscribes to stream events, and we expect >10 concurrent workspaces @@ -119,6 +126,7 @@ export class AIService extends EventEmitter { this.config = config; this.historyService = historyService; this.partialService = partialService; + this.initStateManager = initStateManager; this.streamManager = new StreamManager(historyService, partialService); void this.ensureSessionsDir(); this.setupStreamEventForwarding(); @@ -423,12 +431,17 @@ export class AIService extends EventEmitter { // Get tool names early for mode transition sentinel (stub config, no workspace context needed) const earlyRuntime = createRuntime({ type: "local", srcBaseDir: process.cwd() }); - const earlyAllTools = await getToolsForModel(modelString, { - cwd: process.cwd(), - runtime: earlyRuntime, - runtimeTempDir: os.tmpdir(), - secrets: {}, - }); + const earlyAllTools = await getToolsForModel( + modelString, + { + cwd: process.cwd(), + runtime: earlyRuntime, + runtimeTempDir: os.tmpdir(), + secrets: {}, + }, + "", // Empty workspace ID for early stub config + this.initStateManager + ); const earlyTools = applyToolPolicy(earlyAllTools, toolPolicy); const toolNamesForSentinel = Object.keys(earlyTools); @@ -533,12 +546,17 @@ export class AIService extends EventEmitter { const runtimeTempDir = await this.streamManager.createTempDirForStream(streamToken, runtime); // Get model-specific tools with workspace path (correct for local or remote) - const allTools = await getToolsForModel(modelString, { - cwd: workspacePath, - runtime, - secrets: secretsToRecord(projectSecrets), - runtimeTempDir, - }); + const allTools = await getToolsForModel( + modelString, + { + cwd: workspacePath, + runtime, + secrets: secretsToRecord(projectSecrets), + runtimeTempDir, + }, + workspaceId, + this.initStateManager + ); // Apply tool policy to filter tools (if policy provided) const tools = applyToolPolicy(allTools, toolPolicy); diff --git a/src/services/initStateManager.test.ts b/src/services/initStateManager.test.ts index 936ea3b1e..d5cb9721c 100644 --- a/src/services/initStateManager.test.ts +++ b/src/services/initStateManager.test.ts @@ -53,8 +53,10 @@ describe("InitStateManager", () => { manager.appendOutput(workspaceId, "Installing deps...", false); manager.appendOutput(workspaceId, "Done!", false); expect(manager.getInitState(workspaceId)?.lines).toEqual([ - { line: "Installing deps...", isError: false, timestamp: expect.any(Number) as number }, - { line: "Done!", isError: false, timestamp: expect.any(Number) as number }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "Installing deps...", isError: false, timestamp: expect.any(Number) }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "Done!", isError: false, timestamp: expect.any(Number) }, ]); // End init (await to ensure event fires) @@ -79,8 +81,10 @@ describe("InitStateManager", () => { const state = manager.getInitState(workspaceId); expect(state?.lines).toEqual([ - { line: "stdout line", isError: false, timestamp: expect.any(Number) as number }, - { line: "stderr line", isError: true, timestamp: expect.any(Number) as number }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "stdout line", isError: false, timestamp: expect.any(Number) }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "stderr line", isError: true, timestamp: expect.any(Number) }, ]); }); @@ -109,8 +113,10 @@ describe("InitStateManager", () => { expect(diskState?.status).toBe("success"); expect(diskState?.exitCode).toBe(0); expect(diskState?.lines).toEqual([ - { line: "Line 1", isError: false, timestamp: expect.any(Number) as number }, - { line: "Line 2", isError: true, timestamp: expect.any(Number) as number }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "Line 1", isError: false, timestamp: expect.any(Number) }, + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + { line: "Line 2", isError: true, timestamp: expect.any(Number) }, ]); }); @@ -221,15 +227,25 @@ describe("InitStateManager", () => { expect(stateAfterDelete).toBeNull(); }); - it("should clear in-memory state", () => { + it("should clear in-memory state", async () => { const workspaceId = "test-workspace"; manager.startInit(workspaceId, "/path/to/hook"); expect(manager.getInitState(workspaceId)).toBeTruthy(); + // Get the init promise before clearing + const initPromise = manager.waitForInit(workspaceId); + + // Clear in-memory state (rejects internal promise, but waitForInit catches it) manager.clearInMemoryState(workspaceId); + // Verify state is cleared expect(manager.getInitState(workspaceId)).toBeUndefined(); + + // waitForInit never throws - it resolves even when init is canceled + // This allows tools to proceed and fail naturally with their own errors + // eslint-disable-next-line @typescript-eslint/await-thenable + await expect(initPromise).resolves.toBeUndefined(); }); }); diff --git a/src/services/initStateManager.ts b/src/services/initStateManager.ts index 495c977f1..368f41f42 100644 --- a/src/services/initStateManager.ts +++ b/src/services/initStateManager.ts @@ -47,15 +47,29 @@ type InitHookState = InitStatus; * - Permanent persistence (init logs kept forever as workspace metadata) * * Lifecycle: - * 1. startInit() - Create in-memory state, emit init-start + * 1. startInit() - Create in-memory state, emit init-start, create completion promise * 2. appendOutput() - Accumulate lines, emit init-output - * 3. endInit() - Finalize state, write to disk, emit init-end + * 3. endInit() - Finalize state, write to disk, emit init-end, resolve promise * 4. State remains in memory until cleared or process restart * 5. replayInit() - Re-emit events from in-memory or disk state (via EventStore) + * + * Waiting: Tools use waitForInit() which returns a promise that resolves when + * init completes. This promise is stored in initPromises map and resolved by + * endInit(). No event listeners needed, eliminating race conditions. */ export class InitStateManager extends EventEmitter { private readonly store: EventStore; + /** + * Promise-based completion tracking for running inits. + * Each running init has a promise that resolves when endInit() is called. + * Multiple tools can await the same promise without race conditions. + */ + private readonly initPromises = new Map< + string, + { promise: Promise; resolve: () => void; reject: (error: Error) => void } + >(); + constructor(config: Config) { super(); this.store = new EventStore( @@ -111,7 +125,7 @@ export class InitStateManager extends EventEmitter { /** * Start tracking a new init hook execution. - * Creates in-memory state and emits init-start event. + * Creates in-memory state, completion promise, and emits init-start event. */ startInit(workspaceId: string, hookPath: string): void { const startTime = Date.now(); @@ -127,6 +141,21 @@ export class InitStateManager extends EventEmitter { this.store.setState(workspaceId, state); + // Create completion promise for this init + // This allows multiple tools to await the same init without event listeners + let resolve: () => void; + let reject: (error: Error) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + + this.initPromises.set(workspaceId, { + promise, + resolve: resolve!, + reject: reject!, + }); + log.debug(`Init hook started for workspace ${workspaceId}: ${hookPath}`); // Emit init-start event @@ -167,7 +196,7 @@ export class InitStateManager extends EventEmitter { /** * Finalize init hook execution. - * Updates state, persists to disk, and emits init-end event. + * Updates state, persists to disk, emits init-end event, and resolves completion promise. */ async endInit(workspaceId: string, exitCode: number): Promise { const state = this.store.getState(workspaceId); @@ -197,6 +226,13 @@ export class InitStateManager extends EventEmitter { timestamp: endTime, } satisfies WorkspaceInitEvent & { workspaceId: string }); + // Resolve completion promise for waiting tools + const promiseEntry = this.initPromises.get(workspaceId); + if (promiseEntry) { + promiseEntry.resolve(); + this.initPromises.delete(workspaceId); + } + // Keep state in memory for replay (unlike streams which delete immediately) } @@ -244,8 +280,83 @@ export class InitStateManager extends EventEmitter { * Clear in-memory state for a workspace. * Useful for testing or cleanup after workspace deletion. * Does NOT delete disk file (use deleteInitStatus for that). + * + * Also cancels any running init promises to prevent orphaned waiters. */ clearInMemoryState(workspaceId: string): void { this.store.deleteState(workspaceId); + + // Cancel any running init promise for this workspace + const promiseEntry = this.initPromises.get(workspaceId); + if (promiseEntry) { + promiseEntry.reject(new Error(`Workspace ${workspaceId} was deleted`)); + this.initPromises.delete(workspaceId); + } + } + + /** + * Wait for workspace initialization to complete. + * Used by tools (bash, file_*) to ensure files are ready before executing. + * + * Behavior: + * - No init state: Returns immediately (init not needed or backwards compat) + * - Init succeeded/failed: Returns immediately (tools proceed regardless of init outcome) + * - Init running: Waits for completion promise (up to 5 minutes, then proceeds anyway) + * + * This method NEVER throws - tools should always proceed. If init fails or times out, + * the tool will either succeed (if init wasn't critical) or fail with its own error + * (e.g., file not found). This provides better error messages than blocking on init. + * + * Promise-based approach eliminates race conditions: + * - Multiple tools share the same promise (no duplicate listeners) + * - No event cleanup needed (promise auto-resolves once) + * - Timeout races handled by Promise.race() + * + * @param workspaceId Workspace ID to wait for + */ + async waitForInit(workspaceId: string): Promise { + const state = this.getInitState(workspaceId); + + // No init state - proceed immediately (backwards compat or init not needed) + if (!state) { + return; + } + + // Init already completed (success or failure) - proceed immediately + // Tools should work regardless of init outcome + if (state.status !== "running") { + return; + } + + // Init is running - wait for completion promise with timeout + const promiseEntry = this.initPromises.get(workspaceId); + + if (!promiseEntry) { + // State says running but no promise exists (shouldn't happen, but handle gracefully) + log.error(`Init state is running for ${workspaceId} but no promise found, proceeding`); + return; + } + + const INIT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes + const timeoutPromise = new Promise((resolve) => { + setTimeout(() => { + log.error( + `Init timeout for ${workspaceId} after 5 minutes - tools will proceed anyway. ` + + `Init will continue in background.` + ); + resolve(); // Resolve, don't reject - tools should proceed + }, INIT_TIMEOUT_MS); + }); + + // Race between completion and timeout + // Both resolve (no rejection), so tools always proceed + try { + await Promise.race([promiseEntry.promise, timeoutPromise]); + } catch (error) { + // Init promise was rejected (e.g., workspace deleted) + // Log and proceed anyway - let the tool fail with its own error if needed + const errorMsg = error instanceof Error ? error.message : String(error); + log.error(`Init wait interrupted for ${workspaceId}: ${errorMsg} - proceeding anyway`); + } } } diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts index 5b5c37380..202386d61 100644 --- a/src/services/ipcMain.ts +++ b/src/services/ipcMain.ts @@ -134,9 +134,14 @@ export class IpcMain { this.config = config; this.historyService = new HistoryService(config); this.partialService = new PartialService(config, this.historyService); - this.aiService = new AIService(config, this.historyService, this.partialService); - this.bashService = new BashExecutionService(); this.initStateManager = new InitStateManager(config); + this.aiService = new AIService( + config, + this.historyService, + this.partialService, + this.initStateManager + ); + this.bashService = new BashExecutionService(); } private getOrCreateSession(workspaceId: string): AgentSession { @@ -919,6 +924,7 @@ export class IpcMain { // Create bash tool with workspace's cwd and secrets // All IPC bash calls are from UI (background operations) - use truncate to avoid temp file spam + // No init wait needed - IPC calls are user-initiated, not AI tool use const bashTool = createBashTool({ cwd: workspacePath, // Bash executes in the workspace directory runtime, diff --git a/src/services/systemMessage.test.ts b/src/services/systemMessage.test.ts index 23554c058..fb1748eed 100644 --- a/src/services/systemMessage.test.ts +++ b/src/services/systemMessage.test.ts @@ -3,8 +3,7 @@ import * as os from "os"; import * as path from "path"; import { buildSystemMessage } from "./systemMessage"; import type { WorkspaceMetadata } from "@/types/workspace"; -import { spyOn, describe, test, expect, beforeEach, afterEach } from "bun:test"; -import type { Mock } from "bun:test"; +import { describe, test, expect, beforeEach, afterEach, spyOn, type Mock } from "bun:test"; import { LocalRuntime } from "@/runtime/LocalRuntime"; describe("buildSystemMessage", () => { diff --git a/src/services/tools/bash.test.ts b/src/services/tools/bash.test.ts index c860cfaa3..4ea282d6f 100644 --- a/src/services/tools/bash.test.ts +++ b/src/services/tools/bash.test.ts @@ -1,11 +1,11 @@ import { describe, it, expect } from "bun:test"; +import { LocalRuntime } from "@/runtime/LocalRuntime"; import { createBashTool } from "./bash"; import type { BashToolArgs, BashToolResult } from "@/types/tools"; import { BASH_MAX_TOTAL_BYTES } from "@/constants/toolLimits"; import * as fs from "fs"; -import { TestTempDir } from "./testHelpers"; +import { TestTempDir, createTestToolConfig, getTestDeps } from "./testHelpers"; import { createRuntime } from "@/runtime/runtimeFactory"; - import type { ToolCallOptions } from "ai"; // Mock ToolCallOptions for testing @@ -19,12 +19,9 @@ const mockToolCallOptions: ToolCallOptions = { // Use with: using testEnv = createTestBashTool(); function createTestBashTool(options?: { niceness?: number }) { const tempDir = new TestTempDir("test-bash"); - const tool = createBashTool({ - cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), - runtimeTempDir: tempDir.path, - ...options, - }); + const config = createTestToolConfig(process.cwd(), { niceness: options?.niceness }); + config.runtimeTempDir = tempDir.path; // Override runtimeTempDir to use test's disposable temp dir + const tool = createBashTool(config); return { tool, @@ -161,12 +158,10 @@ describe("bash tool", () => { it("should truncate overflow output when overflow_policy is 'truncate'", async () => { const tempDir = new TestTempDir("test-bash-truncate"); - const tool = createBashTool({ - cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), - runtimeTempDir: tempDir.path, - overflow_policy: "truncate", - }); + const config = createTestToolConfig(process.cwd()); + config.runtimeTempDir = tempDir.path; + config.overflow_policy = "truncate"; + const tool = createBashTool(config); const args: BashToolArgs = { // Generate ~1.5MB of output (1700 lines * 900 bytes) to exceed 1MB byte limit @@ -201,8 +196,9 @@ describe("bash tool", () => { it("should reject single overlong line before storing it (IPC mode)", async () => { const tempDir = new TestTempDir("test-bash-overlong-line"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, overflow_policy: "truncate", }); @@ -233,8 +229,9 @@ describe("bash tool", () => { it("should reject overlong line at boundary (IPC mode)", async () => { const tempDir = new TestTempDir("test-bash-boundary"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, overflow_policy: "truncate", }); @@ -269,8 +266,9 @@ describe("bash tool", () => { it("should use tmpfile policy by default when overflow_policy not specified", async () => { const tempDir = new TestTempDir("test-bash-default"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, // overflow_policy not specified - should default to tmpfile }); @@ -302,8 +300,9 @@ describe("bash tool", () => { it("should preserve up to 100KB in temp file even after 16KB display limit", async () => { const tempDir = new TestTempDir("test-bash-100kb"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -354,8 +353,9 @@ describe("bash tool", () => { it("should stop collection at 100KB file limit", async () => { const tempDir = new TestTempDir("test-bash-100kb-limit"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -397,8 +397,9 @@ describe("bash tool", () => { it("should NOT kill process at display limit (16KB) - verify command completes naturally", async () => { const tempDir = new TestTempDir("test-bash-no-kill-display"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -439,8 +440,9 @@ describe("bash tool", () => { it("should kill process immediately when single line exceeds per-line limit", async () => { const tempDir = new TestTempDir("test-bash-per-line-kill"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -479,8 +481,9 @@ describe("bash tool", () => { it("should handle output just under 16KB without truncation", async () => { const tempDir = new TestTempDir("test-bash-under-limit"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -509,8 +512,9 @@ describe("bash tool", () => { it("should trigger display truncation at exactly 300 lines", async () => { const tempDir = new TestTempDir("test-bash-exact-limit"); const tool = createBashTool({ + ...getTestDeps(), cwd: process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: tempDir.path, }); @@ -1160,6 +1164,7 @@ describe("SSH runtime redundant cd detection", () => { }); const tool = createBashTool({ + ...getTestDeps(), cwd, runtime: sshRuntime, runtimeTempDir: tempDir.path, diff --git a/src/services/tools/file_edit_insert.test.ts b/src/services/tools/file_edit_insert.test.ts index 2c429c777..c056e5e8d 100644 --- a/src/services/tools/file_edit_insert.test.ts +++ b/src/services/tools/file_edit_insert.test.ts @@ -5,7 +5,7 @@ import * as os from "os"; import { createFileEditInsertTool } from "./file_edit_insert"; import type { FileEditInsertToolArgs, FileEditInsertToolResult } from "@/types/tools"; import type { ToolCallOptions } from "ai"; -import { TestTempDir } from "./testHelpers"; +import { TestTempDir, getTestDeps } from "./testHelpers"; import { createRuntime } from "@/runtime/runtimeFactory"; // Mock ToolCallOptions for testing @@ -19,6 +19,7 @@ const mockToolCallOptions: ToolCallOptions = { function createTestFileEditInsertTool(options?: { cwd?: string }) { const tempDir = new TestTempDir("test-file-edit-insert"); const tool = createFileEditInsertTool({ + ...getTestDeps(), cwd: options?.cwd ?? process.cwd(), runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: tempDir.path, @@ -208,6 +209,7 @@ describe("file_edit_insert tool", () => { it("should create file when create is true and file does not exist", async () => { // Setup const tool = createFileEditInsertTool({ + ...getTestDeps(), cwd: testDir, runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: "/tmp", @@ -232,6 +234,7 @@ describe("file_edit_insert tool", () => { it("should create parent directories when create is true", async () => { // Setup const tool = createFileEditInsertTool({ + ...getTestDeps(), cwd: testDir, runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: "/tmp", @@ -259,6 +262,7 @@ describe("file_edit_insert tool", () => { await fs.writeFile(testFilePath, initialContent); const tool = createFileEditInsertTool({ + ...getTestDeps(), cwd: testDir, runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: "/tmp", diff --git a/src/services/tools/file_edit_operation.test.ts b/src/services/tools/file_edit_operation.test.ts index ec28e839c..d65bd4238 100644 --- a/src/services/tools/file_edit_operation.test.ts +++ b/src/services/tools/file_edit_operation.test.ts @@ -1,17 +1,18 @@ import { describe, test, expect, jest } from "@jest/globals"; import { executeFileEditOperation } from "./file_edit_operation"; import { WRITE_DENIED_PREFIX } from "@/types/tools"; -import { createRuntime } from "@/runtime/runtimeFactory"; import type { Runtime } from "@/runtime/Runtime"; +import { createTestToolConfig, getTestDeps } from "./testHelpers"; + const TEST_CWD = "/tmp"; function createConfig(runtime?: Runtime) { - return { - cwd: TEST_CWD, - runtime: runtime ?? createRuntime({ type: "local", srcBaseDir: TEST_CWD }), - runtimeTempDir: "/tmp", - }; + const config = createTestToolConfig(TEST_CWD); + if (runtime) { + config.runtime = runtime; + } + return config; } describe("executeFileEditOperation", () => { @@ -67,6 +68,7 @@ describe("executeFileEditOperation", () => { cwd: testCwd, runtime: mockRuntime, runtimeTempDir: "/tmp", + ...getTestDeps(), }, filePath: testFilePath, operation: () => ({ success: true, newContent: "test", metadata: {} }), diff --git a/src/services/tools/file_edit_replace.test.ts b/src/services/tools/file_edit_replace.test.ts index da76ac87f..87b64e3df 100644 --- a/src/services/tools/file_edit_replace.test.ts +++ b/src/services/tools/file_edit_replace.test.ts @@ -12,6 +12,7 @@ import type { } from "@/types/tools"; import type { ToolCallOptions } from "ai"; import { createRuntime } from "@/runtime/runtimeFactory"; +import { getTestDeps } from "./testHelpers"; // Mock ToolCallOptions for testing const mockToolCallOptions: ToolCallOptions = { @@ -58,6 +59,7 @@ describe("file_edit_replace_string tool", () => { it("should apply a single edit successfully", async () => { await setupFile(testFilePath, "Hello world\nThis is a test\nGoodbye world"); const tool = createFileEditReplaceStringTool({ + ...getTestDeps(), cwd: testDir, runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: "/tmp", @@ -96,6 +98,7 @@ describe("file_edit_replace_lines tool", () => { it("should replace a line range successfully", async () => { await setupFile(testFilePath, "line1\nline2\nline3\nline4"); const tool = createFileEditReplaceLinesTool({ + ...getTestDeps(), cwd: testDir, runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), runtimeTempDir: "/tmp", diff --git a/src/services/tools/file_read.test.ts b/src/services/tools/file_read.test.ts index bd061552e..b540fc7b7 100644 --- a/src/services/tools/file_read.test.ts +++ b/src/services/tools/file_read.test.ts @@ -1,12 +1,12 @@ import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { LocalRuntime } from "@/runtime/LocalRuntime"; import * as fs from "fs/promises"; import * as path from "path"; import * as os from "os"; import { createFileReadTool } from "./file_read"; import type { FileReadToolArgs, FileReadToolResult } from "@/types/tools"; import type { ToolCallOptions } from "ai"; -import { TestTempDir } from "./testHelpers"; -import { createRuntime } from "@/runtime/runtimeFactory"; +import { TestTempDir, createTestToolConfig, getTestDeps } from "./testHelpers"; // Mock ToolCallOptions for testing const mockToolCallOptions: ToolCallOptions = { @@ -18,11 +18,9 @@ const mockToolCallOptions: ToolCallOptions = { // Returns both tool and disposable temp directory function createTestFileReadTool(options?: { cwd?: string }) { const tempDir = new TestTempDir("test-file-read"); - const tool = createFileReadTool({ - cwd: options?.cwd ?? process.cwd(), - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), - runtimeTempDir: tempDir.path, - }); + const config = createTestToolConfig(options?.cwd ?? process.cwd()); + config.runtimeTempDir = tempDir.path; // Override runtimeTempDir to use test's disposable temp dir + const tool = createFileReadTool(config); return { tool, @@ -354,8 +352,9 @@ describe("file_read tool", () => { // Try to read file outside cwd by going up const tool = createFileReadTool({ + ...getTestDeps(), cwd: subDir, - runtime: createRuntime({ type: "local", srcBaseDir: "/tmp" }), + runtime: new LocalRuntime(process.cwd()), runtimeTempDir: "/tmp", }); const args: FileReadToolArgs = { diff --git a/src/services/tools/file_read.ts b/src/services/tools/file_read.ts index bba7b8a46..c7bec3cbc 100644 --- a/src/services/tools/file_read.ts +++ b/src/services/tools/file_read.ts @@ -20,6 +20,7 @@ export const createFileReadTool: ToolFactory = (config: ToolConfiguration) => { { abortSignal: _abortSignal } ): Promise => { // Note: abortSignal available but not used - file reads are fast and complete quickly + try { // Validate no redundant path prefix (must come first to catch absolute paths) const redundantPrefixValidation = validateNoRedundantPrefix( diff --git a/src/services/tools/testHelpers.ts b/src/services/tools/testHelpers.ts index a5e21aeec..78dba3fc3 100644 --- a/src/services/tools/testHelpers.ts +++ b/src/services/tools/testHelpers.ts @@ -1,6 +1,10 @@ import * as fs from "fs"; import * as path from "path"; import * as os from "os"; +import { LocalRuntime } from "@/runtime/LocalRuntime"; +import { InitStateManager } from "@/services/initStateManager"; +import { Config } from "@/config"; +import type { ToolConfiguration } from "@/utils/tools/tools"; /** * Disposable test temp directory that auto-cleans when disposed @@ -25,3 +29,44 @@ export class TestTempDir implements Disposable { } } } + +// Singleton instances for test configuration (shared across all test tool configs) +let testConfig: Config | null = null; +let testInitStateManager: InitStateManager | null = null; + +function getTestConfig(): Config { + testConfig ??= new Config(); + return testConfig; +} + +function getTestInitStateManager(): InitStateManager { + testInitStateManager ??= new InitStateManager(getTestConfig()); + return testInitStateManager; +} + +/** + * Create basic tool configuration for testing. + * Returns a config object with default values that can be overridden. + */ +export function createTestToolConfig( + tempDir: string, + options?: { niceness?: number } +): ToolConfiguration { + return { + cwd: tempDir, + runtime: new LocalRuntime(tempDir), + runtimeTempDir: tempDir, + niceness: options?.niceness, + }; +} + +/** + * Get shared test config and initStateManager for inline tool configs in tests. + * Use this when creating tool configs inline in tests. + */ +export function getTestDeps() { + return { + workspaceId: "test-workspace" as const, + initStateManager: getTestInitStateManager(), + }; +} diff --git a/src/services/tools/wrapWithInitWait.ts b/src/services/tools/wrapWithInitWait.ts new file mode 100644 index 000000000..86ad002cb --- /dev/null +++ b/src/services/tools/wrapWithInitWait.ts @@ -0,0 +1,38 @@ +import type { Tool } from "ai"; +import type { InitStateManager } from "@/services/initStateManager"; + +/** + * Wraps a tool to wait for workspace initialization before execution. + * + * This wrapper handles the cross-cutting concern of init state waiting, + * keeping individual tools simple and focused on their core functionality. + * + * Only runtime-dependent tools (bash, file_read, file_edit_*) need this wrapper. + * Non-runtime tools (propose_plan, todo, web_search) execute immediately. + * + * @param tool The tool to wrap (returned from a tool factory) + * @param workspaceId Workspace ID for init state tracking + * @param initStateManager Init state manager for waiting + * @returns Wrapped tool that waits for init before executing + */ +export function wrapWithInitWait( + tool: Tool, + workspaceId: string, + initStateManager: InitStateManager +): Tool { + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + return { + ...tool, + execute: async (args: TParameters, options) => { + // Wait for workspace initialization to complete (no-op if not needed) + // This never throws - tools proceed regardless of init outcome + await initStateManager.waitForInit(workspaceId); + + // Execute the actual tool with all arguments + if (!tool.execute) { + throw new Error("Tool does not have an execute function"); + } + return tool.execute(args, options); + }, + } as Tool; +} diff --git a/src/utils/tools/tools.ts b/src/utils/tools/tools.ts index a876a17a8..c9aa0aeda 100644 --- a/src/utils/tools/tools.ts +++ b/src/utils/tools/tools.ts @@ -6,9 +6,11 @@ import { createFileEditReplaceStringTool } from "@/services/tools/file_edit_repl import { createFileEditInsertTool } from "@/services/tools/file_edit_insert"; import { createProposePlanTool } from "@/services/tools/propose_plan"; import { createTodoWriteTool, createTodoReadTool } from "@/services/tools/todo"; +import { wrapWithInitWait } from "@/services/tools/wrapWithInitWait"; import { log } from "@/services/log"; import type { Runtime } from "@/runtime/Runtime"; +import type { InitStateManager } from "@/services/initStateManager"; /** * Configuration for tools that need runtime context @@ -41,30 +43,48 @@ export type ToolFactory = (config: ToolConfiguration) => Tool; * * @param modelString The model string in format "provider:model-id" * @param config Required configuration for tools + * @param workspaceId Workspace ID for init state tracking (required for runtime tools) + * @param initStateManager Init state manager for runtime tools to wait for initialization * @returns Promise resolving to record of tools available for the model */ export async function getToolsForModel( modelString: string, - config: ToolConfiguration + config: ToolConfiguration, + workspaceId: string, + initStateManager: InitStateManager ): Promise> { const [provider, modelId] = modelString.split(":"); - // Base tools available for all models - const baseTools: Record = { - // Use snake_case for tool names to match what seems to be the convention. - file_read: createFileReadTool(config), - file_edit_replace_string: createFileEditReplaceStringTool(config), + // Helper to reduce repetition when wrapping runtime tools + const wrap = (tool: Tool) => + wrapWithInitWait(tool, workspaceId, initStateManager); + + // Runtime-dependent tools need to wait for workspace initialization + // Wrap them to handle init waiting centrally instead of in each tool + const runtimeTools: Record = { + file_read: wrap(createFileReadTool(config)), + file_edit_replace_string: wrap(createFileEditReplaceStringTool(config)), // DISABLED: file_edit_replace_lines - causes models (particularly GPT-5-Codex) // to leave repository in broken state due to issues with concurrent file modifications // and line number miscalculations. Use file_edit_replace_string or file_edit_insert instead. - // file_edit_replace_lines: createFileEditReplaceLinesTool(config), - file_edit_insert: createFileEditInsertTool(config), - bash: createBashTool(config), + // file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)), + file_edit_insert: wrap(createFileEditInsertTool(config)), + bash: wrap(createBashTool(config)), + }; + + // Non-runtime tools execute immediately (no init wait needed) + const nonRuntimeTools: Record = { propose_plan: createProposePlanTool(config), todo_write: createTodoWriteTool(config), todo_read: createTodoReadTool(config), }; + // Base tools available for all models + const baseTools: Record = { + ...runtimeTools, + ...nonRuntimeTools, + }; + // Try to add provider-specific web search tools if available // Lazy-load providers to avoid loading all AI SDKs at startup try { diff --git a/tests/ipcMain/helpers.ts b/tests/ipcMain/helpers.ts index ac388a349..e8467bae5 100644 --- a/tests/ipcMain/helpers.ts +++ b/tests/ipcMain/helpers.ts @@ -1,6 +1,11 @@ import type { IpcRenderer } from "electron"; import { IPC_CHANNELS, getChatChannel } from "../../src/constants/ipc-constants"; -import type { SendMessageOptions, WorkspaceChatMessage } from "../../src/types/ipc"; +import type { + SendMessageOptions, + WorkspaceChatMessage, + WorkspaceInitEvent, +} from "../../src/types/ipc"; +import { isInitStart, isInitOutput, isInitEnd } from "../../src/types/ipc"; import type { Result } from "../../src/types/result"; import type { SendMessageError } from "../../src/types/errors"; import type { WorkspaceMetadataWithPaths } from "../../src/types/workspace"; @@ -548,6 +553,59 @@ export async function waitForInitComplete( throw new Error(`Init did not complete within ${timeoutMs}ms - workspace may not be ready`); } +/** + * Collect all init events for a workspace. + * Filters sentEvents for init-start, init-output, and init-end events. + * Returns the events in chronological order. + */ +export function collectInitEvents( + env: import("./setup").TestEnvironment, + workspaceId: string +): WorkspaceInitEvent[] { + return env.sentEvents + .filter((e) => e.channel === getChatChannel(workspaceId)) + .map((e) => e.data as WorkspaceChatMessage) + .filter( + (msg) => isInitStart(msg) || isInitOutput(msg) || isInitEnd(msg) + ) as WorkspaceInitEvent[]; +} + +/** + * Wait for init-end event without checking exit code. + * Use this when you want to test failure cases or inspect the exit code yourself. + * For success-only tests, use waitForInitComplete() which throws on failure. + */ +export async function waitForInitEnd( + env: import("./setup").TestEnvironment, + workspaceId: string, + timeoutMs = 5000 +): Promise { + const startTime = Date.now(); + let pollInterval = 50; + + while (Date.now() - startTime < timeoutMs) { + // Check for init-end event in sentEvents + const initEndEvent = env.sentEvents.find( + (e) => + e.channel === getChatChannel(workspaceId) && + typeof e.data === "object" && + e.data !== null && + "type" in e.data && + e.data.type === "init-end" + ); + + if (initEndEvent) { + return; // Found end event, regardless of exit code + } + + await new Promise((resolve) => setTimeout(resolve, pollInterval)); + pollInterval = Math.min(pollInterval * 1.5, 500); + } + + // Throw error on timeout + throw new Error(`Init did not complete within ${timeoutMs}ms`); +} + /** * Wait for stream to complete successfully * Common pattern: create collector, wait for end, assert success diff --git a/tests/ipcMain/workspaceInitHook.test.ts b/tests/ipcMain/initWorkspace.test.ts similarity index 51% rename from tests/ipcMain/workspaceInitHook.test.ts rename to tests/ipcMain/initWorkspace.test.ts index e23dd8e6e..82a59090b 100644 --- a/tests/ipcMain/workspaceInitHook.test.ts +++ b/tests/ipcMain/initWorkspace.test.ts @@ -1,14 +1,42 @@ -import { shouldRunIntegrationTests, createTestEnvironment, cleanupTestEnvironment } from "./setup"; +import { + shouldRunIntegrationTests, + createTestEnvironment, + cleanupTestEnvironment, + validateApiKeys, + getApiKey, + setupProviders, + preloadTestModules, + type TestEnvironment, +} from "./setup"; import { IPC_CHANNELS, getChatChannel } from "../../src/constants/ipc-constants"; -import { generateBranchName, createWorkspace } from "./helpers"; +import { + generateBranchName, + createWorkspace, + waitForInitComplete, + waitForInitEnd, + collectInitEvents, + waitFor, +} from "./helpers"; import type { WorkspaceChatMessage, WorkspaceInitEvent } from "../../src/types/ipc"; import { isInitStart, isInitOutput, isInitEnd } from "../../src/types/ipc"; import * as path from "path"; import * as os from "os"; +import { + isDockerAvailable, + startSSHServer, + stopSSHServer, + type SSHServerConfig, +} from "../runtime/ssh-fixture"; +import type { RuntimeConfig } from "../../src/types/runtime"; // Skip all tests if TEST_INTEGRATION is not set const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; +// Validate API keys for AI tests +if (shouldRunIntegrationTests()) { + validateApiKeys(["ANTHROPIC_API_KEY"]); +} + /** * Create a temp git repo with a .cmux/init hook that writes to stdout/stderr and exits with a given code */ @@ -17,6 +45,7 @@ async function createTempGitRepoWithInitHook(options: { stdoutLines?: string[]; stderrLines?: string[]; sleepBetweenLines?: number; // milliseconds + customScript?: string; // Optional custom script content (overrides stdout/stderr) }): Promise { const fs = await import("fs/promises"); const { exec } = await import("child_process"); @@ -41,25 +70,30 @@ async function createTempGitRepoWithInitHook(options: { // Create init hook script const hookPath = path.join(cmuxDir, "init"); - const sleepCmd = options.sleepBetweenLines ? `sleep ${options.sleepBetweenLines / 1000}` : ""; - const stdoutCmds = (options.stdoutLines ?? []) - .map((line, idx) => { - const needsSleep = sleepCmd && idx < (options.stdoutLines?.length ?? 0) - 1; - return `echo "${line}"${needsSleep ? `\n${sleepCmd}` : ""}`; - }) - .join("\n"); + let scriptContent: string; + if (options.customScript) { + scriptContent = `#!/bin/bash\n${options.customScript}\nexit ${options.exitCode}\n`; + } else { + const sleepCmd = options.sleepBetweenLines ? `sleep ${options.sleepBetweenLines / 1000}` : ""; + + const stdoutCmds = (options.stdoutLines ?? []) + .map((line, idx) => { + const needsSleep = sleepCmd && idx < (options.stdoutLines?.length ?? 0) - 1; + return `echo "${line}"${needsSleep ? `\n${sleepCmd}` : ""}`; + }) + .join("\n"); - const stderrCmds = (options.stderrLines ?? []).map((line) => `echo "${line}" >&2`).join("\n"); + const stderrCmds = (options.stderrLines ?? []).map((line) => `echo "${line}" >&2`).join("\n"); - const scriptContent = `#!/usr/bin/env bash -${stdoutCmds} -${stderrCmds} -exit ${options.exitCode} -`; + scriptContent = `#!/bin/bash\n${stdoutCmds}\n${stderrCmds}\nexit ${options.exitCode}\n`; + } await fs.writeFile(hookPath, scriptContent, { mode: 0o755 }); + // Commit the init hook (required for SSH runtime - git worktree syncs committed files) + await execAsync(`git add -A && git commit -m "Add init hook"`, { cwd: tempDir }); + return tempDir; } @@ -106,31 +140,11 @@ describeIntegration("IpcMain workspace init hook integration tests", () => { const workspaceId = createResult.metadata.id; - // Wait for hook to complete by polling sentEvents - const deadline = Date.now() + 10000; - let initEvents: WorkspaceInitEvent[] = []; - while (Date.now() < deadline) { - // Filter sentEvents for this workspace's init events on chat channel - initEvents = env.sentEvents - .filter((e) => e.channel === getChatChannel(workspaceId)) - .map((e) => e.data as WorkspaceChatMessage) - .filter( - (msg) => isInitStart(msg) || isInitOutput(msg) || isInitEnd(msg) - ) as WorkspaceInitEvent[]; - - // Check if we have the end event - const hasEnd = initEvents.some((e) => isInitEnd(e)); - if (hasEnd) break; - - // Wait a bit before checking again - await new Promise((resolve) => setTimeout(resolve, 100)); - } + // Wait for hook to complete + await waitForInitComplete(env, workspaceId, 10000); - // Verify we got the end event - const successEndEvent = initEvents.find((e) => isInitEnd(e)); - if (!successEndEvent) { - throw new Error("Hook did not complete in time"); - } + // Collect all init events for verification + const initEvents = collectInitEvents(env, workspaceId); // Verify event sequence expect(initEvents.length).toBeGreaterThan(0); @@ -203,27 +217,11 @@ describeIntegration("IpcMain workspace init hook integration tests", () => { const workspaceId = createResult.metadata.id; - // Wait for hook to complete by polling sentEvents - const deadline = Date.now() + 10000; - let initEvents: WorkspaceInitEvent[] = []; - while (Date.now() < deadline) { - initEvents = env.sentEvents - .filter((e) => e.channel === getChatChannel(workspaceId)) - .map((e) => e.data as WorkspaceChatMessage) - .filter( - (msg) => isInitStart(msg) || isInitOutput(msg) || isInitEnd(msg) - ) as WorkspaceInitEvent[]; - - const hasEnd = initEvents.some((e) => isInitEnd(e)); - if (hasEnd) break; - - await new Promise((resolve) => setTimeout(resolve, 100)); - } + // Wait for hook to complete (without throwing on failure) + await waitForInitEnd(env, workspaceId, 10000); - const failureEndEvent = initEvents.find((e) => isInitEnd(e)); - if (!failureEndEvent) { - throw new Error("Hook did not complete in time"); - } + // Collect all init events for verification + const initEvents = collectInitEvents(env, workspaceId); // Verify we got events expect(initEvents.length).toBeGreaterThan(0); @@ -293,10 +291,7 @@ describeIntegration("IpcMain workspace init hook integration tests", () => { await new Promise((resolve) => setTimeout(resolve, 500)); // Verify init events were sent (workspace creation logs even without hook) - const initEvents = env.sentEvents - .filter((e) => e.channel === getChatChannel(workspaceId)) - .map((e) => e.data as WorkspaceChatMessage) - .filter((msg) => isInitStart(msg) || isInitOutput(msg) || isInitEnd(msg)); + const initEvents = collectInitEvents(env, workspaceId); // Should have init-start event (always emitted, even without hook) const startEvent = initEvents.find((e) => isInitStart(e)); @@ -347,7 +342,7 @@ describeIntegration("IpcMain workspace init hook integration tests", () => { const workspaceId = createResult.metadata.id; // Wait for init hook to complete - await new Promise((resolve) => setTimeout(resolve, 1000)); + await waitForInitComplete(env, workspaceId, 5000); // Verify init-status.json exists on disk const initStatusPath = path.join(env.config.getSessionDir(workspaceId), "init-status.json"); @@ -416,25 +411,19 @@ test.concurrent( const workspaceId = createResult.metadata.id; // Wait for all init events to arrive - const deadline = Date.now() + 10000; - let initOutputEvents: Array<{ timestamp: number; line: string }> = []; + await waitForInitComplete(env, workspaceId, 10000); - while (Date.now() < deadline) { - const currentEvents = env.sentEvents - .filter((e) => e.channel === getChatChannel(workspaceId)) - .filter((e) => isInitOutput(e.data as WorkspaceChatMessage)); - - const allOutputEvents = currentEvents.map((e) => ({ + // Collect timestamped output events + const allOutputEvents = env.sentEvents + .filter((e) => e.channel === getChatChannel(workspaceId)) + .filter((e) => isInitOutput(e.data as WorkspaceChatMessage)) + .map((e) => ({ timestamp: e.timestamp, // Use timestamp from when event was sent line: (e.data as { line: string }).line, })); - // Filter to only hook output lines (exclude workspace creation logs) - initOutputEvents = allOutputEvents.filter((e) => e.line.startsWith("Line ")); - - if (initOutputEvents.length >= 4) break; - await new Promise((resolve) => setTimeout(resolve, 50)); - } + // Filter to only hook output lines (exclude workspace creation logs) + const initOutputEvents = allOutputEvents.filter((e) => e.line.startsWith("Line ")); expect(initOutputEvents.length).toBe(4); @@ -461,3 +450,272 @@ test.concurrent( }, 15000 ); + +// SSH server config for runtime matrix tests +let sshConfig: SSHServerConfig | undefined; + +// ============================================================================ +// Runtime Matrix Tests - Init Queue Behavior +// ============================================================================ + +describeIntegration("Init Queue - Runtime Matrix", () => { + beforeAll(async () => { + // Preload AI SDK providers and tokenizers + await preloadTestModules(); + + // Only start SSH server if Docker is available + if (await isDockerAvailable()) { + console.log("Starting SSH server container for init queue tests..."); + sshConfig = await startSSHServer(); + console.log(`SSH server ready on port ${sshConfig.port}`); + } else { + console.log("Docker not available - SSH tests will be skipped"); + } + }, 60000); + + afterAll(async () => { + if (sshConfig) { + console.log("Stopping SSH server container..."); + await stopSSHServer(sshConfig); + } + }, 30000); + + // Test matrix: Run tests for both local and SSH runtimes + describe.each<{ type: "local" | "ssh" }>([{ type: "local" }, { type: "ssh" }])( + "Runtime: $type", + ({ type }) => { + // Helper to build runtime config + const getRuntimeConfig = (branchName: string): RuntimeConfig | undefined => { + if (type === "ssh" && sshConfig) { + return { + type: "ssh", + host: `testuser@localhost`, + srcBaseDir: `${sshConfig.workdir}/${branchName}`, + identityFile: sshConfig.privateKeyPath, + port: sshConfig.port, + }; + } + return undefined; // undefined = defaults to local + }; + + // Timeouts vary by runtime type + const testTimeout = type === "ssh" ? 90000 : 30000; + const streamTimeout = type === "ssh" ? 30000 : 15000; + const initWaitBuffer = type === "ssh" ? 10000 : 2000; + + test.concurrent( + "file_read should wait for init hook before executing (even when init fails)", + async () => { + // Skip SSH test if Docker not available + if (type === "ssh" && !sshConfig) { + console.log("Skipping SSH test - Docker not available"); + return; + } + + const env = await createTestEnvironment(); + const branchName = generateBranchName("init-wait-file-read"); + + // Setup API provider + await setupProviders(env.mockIpcRenderer, { + anthropic: { + apiKey: getApiKey("ANTHROPIC_API_KEY"), + }, + }); + + // Create repo with init hook that sleeps 5s, writes a file, then FAILS + // This tests that tools proceed even when init hook fails (exit code 1) + const tempGitRepo = await createTempGitRepoWithInitHook({ + exitCode: 1, // EXIT WITH FAILURE + customScript: ` +echo "Starting init..." +sleep 5 +echo "Writing file before exit..." +echo "Hello from init hook!" > init_created_file.txt +echo "File written, now exiting with error" +exit 1 + `, + }); + + try { + // Create workspace with runtime config + const runtimeConfig = getRuntimeConfig(branchName); + const createResult = await createWorkspace( + env.mockIpcRenderer, + tempGitRepo, + branchName, + undefined, + runtimeConfig + ); + expect(createResult.success).toBe(true); + if (!createResult.success) return; + + const workspaceId = createResult.metadata.id; + + // Clear sent events to isolate AI message events + env.sentEvents.length = 0; + + // IMMEDIATELY ask AI to read the file (before init completes) + const sendResult = await env.mockIpcRenderer.invoke( + IPC_CHANNELS.WORKSPACE_SEND_MESSAGE, + workspaceId, + "Read the file init_created_file.txt and tell me what it says", + { + model: "anthropic:claude-haiku-4-5", + } + ); + + expect(sendResult.success).toBe(true); + + // Wait for stream completion + await waitFor(() => { + const chatChannel = getChatChannel(workspaceId); + return env.sentEvents + .filter((e) => e.channel === chatChannel) + .some( + (e) => + typeof e.data === "object" && + e.data !== null && + "type" in e.data && + e.data.type === "stream-end" + ); + }, streamTimeout); + + // Extract all tool call end events from the stream + const chatChannel = getChatChannel(workspaceId); + const toolCallEndEvents = env.sentEvents + .filter((e) => e.channel === chatChannel) + .map((e) => e.data as WorkspaceChatMessage) + .filter( + (msg) => + typeof msg === "object" && + msg !== null && + "type" in msg && + msg.type === "tool-call-end" + ); + + // Count file_read tool calls + const fileReadCalls = toolCallEndEvents.filter( + (msg: any) => msg.toolName === "file_read" + ); + + // ASSERTION 1: Should have exactly ONE file_read call (no retries) + // This proves the tool waited for init to complete (even though init failed) + expect(fileReadCalls.length).toBe(1); + + // ASSERTION 2: The file_read should have succeeded + // Init failure doesn't block tools - they proceed and fail/succeed naturally + const fileReadResult = fileReadCalls[0] as any; + expect(fileReadResult.result?.success).toBe(true); + + // ASSERTION 3: Should contain the expected content + // File was created before init exited with error, so read succeeds + const content = fileReadResult.result?.content; + expect(content).toContain("Hello from init hook!"); + + // Wait for init to complete (with failure) + await waitForInitEnd(env, workspaceId, initWaitBuffer); + + // Verify init completed with FAILURE (exit code 1) + const initEvents = collectInitEvents(env, workspaceId); + const initEndEvent = initEvents.find((e) => isInitEnd(e)); + expect(initEndEvent).toBeDefined(); + if (initEndEvent && isInitEnd(initEndEvent)) { + expect(initEndEvent.exitCode).toBe(1); + } + + // ======================================================================== + // SECOND MESSAGE: Verify init state persistence (with failed init) + // ======================================================================== + // After init completes (even with failure), subsequent operations should + // NOT wait for init. This tests that waitForInit() correctly returns + // immediately when state.status !== "running" (whether "success" OR "error") + // ======================================================================== + + // Clear events to isolate second message + env.sentEvents.length = 0; + + const startSecondMessage = Date.now(); + + // Send another message to read the same file + const sendResult2 = await env.mockIpcRenderer.invoke( + IPC_CHANNELS.WORKSPACE_SEND_MESSAGE, + workspaceId, + "Read init_created_file.txt again and confirm the content", + { + model: "anthropic:claude-haiku-4-5", + } + ); + + expect(sendResult2.success).toBe(true); + + // Wait for stream completion + const deadline2 = Date.now() + streamTimeout; + let streamComplete2 = false; + + while (Date.now() < deadline2 && !streamComplete2) { + const chatChannel = getChatChannel(workspaceId); + const chatEvents = env.sentEvents.filter((e) => e.channel === chatChannel); + + streamComplete2 = chatEvents.some( + (e) => + typeof e.data === "object" && + e.data !== null && + "type" in e.data && + e.data.type === "stream-end" + ); + + if (!streamComplete2) { + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } + + expect(streamComplete2).toBe(true); + + // Extract tool calls from second message + const toolCallEndEvents2 = env.sentEvents + .filter((e) => e.channel === chatChannel) + .map((e) => e.data as WorkspaceChatMessage) + .filter( + (msg) => + typeof msg === "object" && + msg !== null && + "type" in msg && + msg.type === "tool-call-end" + ); + + const fileReadCalls2 = toolCallEndEvents2.filter( + (msg: any) => msg.toolName === "file_read" + ); + + // ASSERTION 4: Second message should also have exactly ONE file_read + expect(fileReadCalls2.length).toBe(1); + + // ASSERTION 5: Second file_read should succeed (init already complete) + const fileReadResult2 = fileReadCalls2[0] as any; + expect(fileReadResult2.result?.success).toBe(true); + + // ASSERTION 6: Content should still be correct + const content2 = fileReadResult2.result?.content; + expect(content2).toContain("Hello from init hook!"); + + // ASSERTION 7: Second message should be MUCH faster than first + // First message had to wait ~5 seconds for init. Second should be instant. + const secondMessageDuration = Date.now() - startSecondMessage; + // Allow 10 seconds for API round-trip but should be way less than first message + expect(secondMessageDuration).toBeLessThan(10000); + + // Log timing for debugging + console.log(`Second message completed in ${secondMessageDuration}ms (no init wait)`); + + // Cleanup workspace + await env.mockIpcRenderer.invoke(IPC_CHANNELS.WORKSPACE_REMOVE, workspaceId); + } finally { + await cleanupTestEnvironment(env); + await cleanupTempGitRepo(tempGitRepo); + } + }, + testTimeout + ); + } + ); +}); diff --git a/tests/ipcMain/runtimeFileEditing.test.ts b/tests/ipcMain/runtimeFileEditing.test.ts index 206c64c86..d9ed72d07 100644 --- a/tests/ipcMain/runtimeFileEditing.test.ts +++ b/tests/ipcMain/runtimeFileEditing.test.ts @@ -153,11 +153,11 @@ describeIntegration("Runtime File Editing Tools", () => { expect(createStreamEnd).toBeDefined(); expect((createStreamEnd as any).error).toBeUndefined(); - // Now ask AI to read the file + // Now ask AI to read the file (explicitly request file_read tool) const readEvents = await sendMessageAndWait( env, workspaceId, - `Read the file ${testFileName} and tell me what it contains.`, + `Use the file_read tool to read ${testFileName} and tell me what it contains.`, HAIKU_MODEL, FILE_TOOLS_ONLY, streamTimeout @@ -236,11 +236,11 @@ describeIntegration("Runtime File Editing Tools", () => { expect(createStreamEnd).toBeDefined(); expect((createStreamEnd as any).error).toBeUndefined(); - // Ask AI to replace text + // Ask AI to replace text (explicitly request file_edit_replace_string tool) const replaceEvents = await sendMessageAndWait( env, workspaceId, - `In ${testFileName}, replace "brown fox" with "red panda".`, + `Use the file_edit_replace_string tool to replace "brown fox" with "red panda" in ${testFileName}.`, HAIKU_MODEL, FILE_TOOLS_ONLY, streamTimeout @@ -325,11 +325,11 @@ describeIntegration("Runtime File Editing Tools", () => { expect(createStreamEnd).toBeDefined(); expect((createStreamEnd as any).error).toBeUndefined(); - // Ask AI to insert text + // Ask AI to insert text (explicitly request file_edit tool usage) const insertEvents = await sendMessageAndWait( env, workspaceId, - `In ${testFileName}, insert "Line 2" between Line 1 and Line 3.`, + `Use the file_edit_insert or file_edit_replace_string tool to insert "Line 2" between Line 1 and Line 3 in ${testFileName}.`, HAIKU_MODEL, FILE_TOOLS_ONLY, streamTimeout @@ -340,12 +340,15 @@ describeIntegration("Runtime File Editing Tools", () => { expect(streamEnd).toBeDefined(); expect((streamEnd as any).error).toBeUndefined(); - // Verify file_edit_insert tool was called + // Verify a file_edit tool was called (either insert or replace_string) const toolCalls = insertEvents.filter( (e) => "type" in e && e.type === "tool-call-start" ); - const insertCall = toolCalls.find((e: any) => e.toolName === "file_edit_insert"); - expect(insertCall).toBeDefined(); + const editCall = toolCalls.find( + (e: any) => + e.toolName === "file_edit_insert" || e.toolName === "file_edit_replace_string" + ); + expect(editCall).toBeDefined(); // Verify the insertion was successful const responseText = extractTextFromEvents(insertEvents); diff --git a/tests/setup.ts b/tests/setup.ts index ad8144a9b..d598a9f14 100644 --- a/tests/setup.ts +++ b/tests/setup.ts @@ -10,7 +10,8 @@ require("disposablestack/auto"); assert.equal(typeof Symbol.dispose, "symbol"); assert.equal(typeof Symbol.asyncDispose, "symbol"); -// Polyfill File for Node 18 (undici needs it) +// Polyfill File for undici in jest environment +// undici expects File to be available globally but jest doesn't provide it if (typeof globalThis.File === "undefined") { (globalThis as any).File = class File extends Blob { constructor(bits: BlobPart[], name: string, options?: FilePropertyBag) {