diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c3b2a233..31a526485 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,7 +133,7 @@ jobs: - name: Run all integration tests with coverage # TEST_OLLAMA=1 enables Ollama-specific tests (now included with all integration tests) # --silent suppresses per-test output (17+ test files × workers = overwhelming logs) - run: TEST_INTEGRATION=1 TEST_OLLAMA=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }} + run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }} env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/tests/ipcMain/anthropic1MContext.test.ts b/tests/ipcMain/anthropic1MContext.test.ts index 21a57ff9f..68b37b059 100644 --- a/tests/ipcMain/anthropic1MContext.test.ts +++ b/tests/ipcMain/anthropic1MContext.test.ts @@ -16,11 +16,6 @@ if (shouldRunIntegrationTests()) { } describeIntegration("IpcMain anthropic 1M context integration tests", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should handle larger context with 1M flag enabled vs standard limits", async () => { diff --git a/tests/ipcMain/anthropicCacheStrategy.test.ts b/tests/ipcMain/anthropicCacheStrategy.test.ts index b44d0b458..bd8d710e3 100644 --- a/tests/ipcMain/anthropicCacheStrategy.test.ts +++ b/tests/ipcMain/anthropicCacheStrategy.test.ts @@ -13,11 +13,6 @@ if (shouldRunIntegrationTests() && !shouldRunSuite) { } describeIntegration("Anthropic cache strategy integration", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(2, { logErrorsBeforeRetry: true }); - } - test( "should apply cache control to messages, system prompt, and tools for Anthropic models", async () => { diff --git a/tests/ipcMain/forkWorkspace.test.ts b/tests/ipcMain/forkWorkspace.test.ts index b4a1c7009..e51490713 100644 --- a/tests/ipcMain/forkWorkspace.test.ts +++ b/tests/ipcMain/forkWorkspace.test.ts @@ -28,11 +28,6 @@ if (shouldRunIntegrationTests()) { } describeIntegration("IpcMain fork workspace integration tests", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should fail to fork workspace with invalid name", async () => { diff --git a/tests/ipcMain/helpers.ts b/tests/ipcMain/helpers.ts index 3450ff10f..654280dac 100644 --- a/tests/ipcMain/helpers.ts +++ b/tests/ipcMain/helpers.ts @@ -793,3 +793,13 @@ export async function buildLargeHistory( } } } + +/** + * Configure test retries for flaky tests in CI + * Only works with Jest + */ +export function configureTestRetries(retries = 3): void { + if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { + jest.retryTimes(retries, { logErrorsBeforeRetry: true }); + } +} diff --git a/tests/ipcMain/modelNotFound.test.ts b/tests/ipcMain/modelNotFound.test.ts index eaf5e6627..821c1d077 100644 --- a/tests/ipcMain/modelNotFound.test.ts +++ b/tests/ipcMain/modelNotFound.test.ts @@ -14,11 +14,6 @@ if (shouldRunIntegrationTests()) { } describeIntegration("IpcMain model_not_found error handling", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should classify Anthropic 404 as model_not_found (not retryable)", async () => { diff --git a/tests/ipcMain/ollama.test.ts b/tests/ipcMain/ollama.test.ts index b3ed80295..61a8c9744 100644 --- a/tests/ipcMain/ollama.test.ts +++ b/tests/ipcMain/ollama.test.ts @@ -5,6 +5,7 @@ import { assertStreamSuccess, extractTextFromEvents, modelString, + configureTestRetries, } from "./helpers"; import { spawn } from "child_process"; @@ -83,9 +84,7 @@ async function ensureOllamaModel(model: string): Promise { describeOllama("IpcMain Ollama integration tests", () => { // Enable retries in CI for potential network flakiness with Ollama - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } + configureTestRetries(3); // Load tokenizer modules and ensure model is available before all tests beforeAll(async () => { @@ -184,7 +183,7 @@ describeOllama("IpcMain Ollama integration tests", () => { // Wait for stream to complete const collector = createEventCollector(env.sentEvents, workspaceId); - await collector.waitForEvent("stream-end", 60000); + await collector.waitForEvent("stream-end", 90000); assertStreamSuccess(collector); diff --git a/tests/ipcMain/openai-web-search.test.ts b/tests/ipcMain/openai-web-search.test.ts index 76587c2b9..18c554384 100644 --- a/tests/ipcMain/openai-web-search.test.ts +++ b/tests/ipcMain/openai-web-search.test.ts @@ -4,6 +4,7 @@ import { createEventCollector, assertStreamSuccess, modelString, + configureTestRetries, } from "./helpers"; // Skip all tests if TEST_INTEGRATION is not set @@ -16,9 +17,7 @@ if (shouldRunIntegrationTests()) { describeIntegration("OpenAI web_search integration tests", () => { // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } + configureTestRetries(3); test.concurrent( "should handle reasoning + web_search without itemId errors", diff --git a/tests/ipcMain/queuedMessages.test.ts b/tests/ipcMain/queuedMessages.test.ts index 7a1ad7a7c..cd9882d32 100644 --- a/tests/ipcMain/queuedMessages.test.ts +++ b/tests/ipcMain/queuedMessages.test.ts @@ -66,11 +66,6 @@ async function waitForRestoreToInputEvent( } describeIntegration("IpcMain queuedMessages integration tests", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should queue message during streaming and auto-send on stream end", async () => { diff --git a/tests/ipcMain/resumeStream.test.ts b/tests/ipcMain/resumeStream.test.ts index 1e755019d..f0be1dfde 100644 --- a/tests/ipcMain/resumeStream.test.ts +++ b/tests/ipcMain/resumeStream.test.ts @@ -15,11 +15,6 @@ if (shouldRunIntegrationTests()) { } describeIntegration("IpcMain resumeStream integration tests", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should resume interrupted stream without new user message", async () => { diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts index 040505ca8..f717eed76 100644 --- a/tests/ipcMain/sendMessage.test.ts +++ b/tests/ipcMain/sendMessage.test.ts @@ -18,6 +18,7 @@ import { readChatHistory, TEST_IMAGES, modelString, + configureTestRetries, } from "./helpers"; import type { StreamDeltaEvent } from "../../src/common/types/stream"; import { IPC_CHANNELS } from "../../src/common/constants/ipc-constants"; @@ -45,11 +46,6 @@ const PROVIDER_CONFIGS: Array<[string, string]> = [ // - Test timeout values (in describe/test) should be 2-3x the expected duration describeIntegration("IpcMain sendMessage integration tests", () => { - // Enable retries in CI for flaky API tests (only works with Jest, not Bun test runner) - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - // Run tests for each provider concurrently describe.each(PROVIDER_CONFIGS)("%s:%s provider tests", (provider, model) => { test.concurrent( @@ -1078,11 +1074,6 @@ These are general instructions that apply to all modes. // Tool policy tests describe("tool policy", () => { - // Retry tool policy tests in CI (they depend on external API behavior) - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(2, { logErrorsBeforeRetry: true }); - } - test.each(PROVIDER_CONFIGS)( "%s should respect tool policy that disables bash", async (provider, model) => { @@ -1504,6 +1495,9 @@ These are general instructions that apply to all modes. // Test image support across providers describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { + // Retry image tests in CI as they can be flaky with some providers + configureTestRetries(3); + test.concurrent( "should send images to AI model and get response", async () => { diff --git a/tests/ipcMain/streamErrorRecovery.test.ts b/tests/ipcMain/streamErrorRecovery.test.ts index ad5cb149f..011ba629b 100644 --- a/tests/ipcMain/streamErrorRecovery.test.ts +++ b/tests/ipcMain/streamErrorRecovery.test.ts @@ -220,11 +220,6 @@ async function collectStreamUntil( } describeIntegration("Stream Error Recovery (No Amnesia)", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should preserve exact prefix and continue from exact point after stream error", async () => { diff --git a/tests/ipcMain/truncate.test.ts b/tests/ipcMain/truncate.test.ts index 1513b8e44..91a9095c6 100644 --- a/tests/ipcMain/truncate.test.ts +++ b/tests/ipcMain/truncate.test.ts @@ -19,11 +19,6 @@ if (shouldRunIntegrationTests()) { } describeIntegration("IpcMain truncate integration tests", () => { - // Enable retries in CI for flaky API tests - if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) { - jest.retryTimes(3, { logErrorsBeforeRetry: true }); - } - test.concurrent( "should truncate 50% of chat history and verify context is updated", async () => { diff --git a/tests/runtime/ssh-fixture.ts b/tests/runtime/ssh-fixture.ts index 40acd7b98..cb6e02044 100644 --- a/tests/runtime/ssh-fixture.ts +++ b/tests/runtime/ssh-fixture.ts @@ -49,6 +49,7 @@ export async function isDockerAvailable(): Promise { export async function startSSHServer(): Promise { // Create temp directory for SSH keys const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mux-ssh-test-")); + let containerId: string | undefined; try { // Generate ephemeral SSH key pair @@ -93,7 +94,7 @@ export async function startSSHServer(): Promise { "mux-ssh-test", ]); - const containerId = runResult.stdout.trim(); + containerId = runResult.stdout.trim(); // Wait for container to be ready await waitForContainer(containerId); @@ -121,6 +122,14 @@ export async function startSSHServer(): Promise { tempDir, }; } catch (error) { + // Cleanup container on failure if it was started + if (containerId) { + try { + await execCommand("docker", ["stop", containerId], { timeout: 10000 }); + } catch (cleanupError) { + console.error("Error stopping container during cleanup:", cleanupError); + } + } // Cleanup temp directory on failure await fs.rm(tempDir, { recursive: true, force: true }); throw error;