Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ jobs:
- name: Run all integration tests with coverage
# TEST_OLLAMA=1 enables Ollama-specific tests (now included with all integration tests)
# --silent suppresses per-test output (17+ test files × workers = overwhelming logs)
run: TEST_INTEGRATION=1 TEST_OLLAMA=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}
run: TEST_INTEGRATION=1 bun x jest --coverage --maxWorkers=100% --silent ${{ github.event.inputs.test_filter || 'tests' }}
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/anthropic1MContext.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ if (shouldRunIntegrationTests()) {
}

describeIntegration("IpcMain anthropic 1M context integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should handle larger context with 1M flag enabled vs standard limits",
async () => {
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/anthropicCacheStrategy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@ if (shouldRunIntegrationTests() && !shouldRunSuite) {
}

describeIntegration("Anthropic cache strategy integration", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(2, { logErrorsBeforeRetry: true });
}

test(
"should apply cache control to messages, system prompt, and tools for Anthropic models",
async () => {
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/forkWorkspace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ if (shouldRunIntegrationTests()) {
}

describeIntegration("IpcMain fork workspace integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should fail to fork workspace with invalid name",
async () => {
Expand Down
10 changes: 10 additions & 0 deletions tests/ipcMain/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -793,3 +793,13 @@ export async function buildLargeHistory(
}
}
}

/**
* Configure test retries for flaky tests in CI
* Only works with Jest
*/
export function configureTestRetries(retries = 3): void {
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(retries, { logErrorsBeforeRetry: true });
}
}
5 changes: 0 additions & 5 deletions tests/ipcMain/modelNotFound.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ if (shouldRunIntegrationTests()) {
}

describeIntegration("IpcMain model_not_found error handling", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should classify Anthropic 404 as model_not_found (not retryable)",
async () => {
Expand Down
7 changes: 3 additions & 4 deletions tests/ipcMain/ollama.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
assertStreamSuccess,
extractTextFromEvents,
modelString,
configureTestRetries,
} from "./helpers";
import { spawn } from "child_process";

Expand Down Expand Up @@ -83,9 +84,7 @@ async function ensureOllamaModel(model: string): Promise<void> {

describeOllama("IpcMain Ollama integration tests", () => {
// Enable retries in CI for potential network flakiness with Ollama
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}
configureTestRetries(3);

// Load tokenizer modules and ensure model is available before all tests
beforeAll(async () => {
Expand Down Expand Up @@ -184,7 +183,7 @@ describeOllama("IpcMain Ollama integration tests", () => {

// Wait for stream to complete
const collector = createEventCollector(env.sentEvents, workspaceId);
await collector.waitForEvent("stream-end", 60000);
await collector.waitForEvent("stream-end", 90000);

assertStreamSuccess(collector);

Expand Down
5 changes: 2 additions & 3 deletions tests/ipcMain/openai-web-search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
createEventCollector,
assertStreamSuccess,
modelString,
configureTestRetries,
} from "./helpers";

// Skip all tests if TEST_INTEGRATION is not set
Expand All @@ -16,9 +17,7 @@ if (shouldRunIntegrationTests()) {

describeIntegration("OpenAI web_search integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}
configureTestRetries(3);

test.concurrent(
"should handle reasoning + web_search without itemId errors",
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/queuedMessages.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,6 @@ async function waitForRestoreToInputEvent(
}

describeIntegration("IpcMain queuedMessages integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should queue message during streaming and auto-send on stream end",
async () => {
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/resumeStream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@ if (shouldRunIntegrationTests()) {
}

describeIntegration("IpcMain resumeStream integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should resume interrupted stream without new user message",
async () => {
Expand Down
14 changes: 4 additions & 10 deletions tests/ipcMain/sendMessage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
readChatHistory,
TEST_IMAGES,
modelString,
configureTestRetries,
} from "./helpers";
import type { StreamDeltaEvent } from "../../src/common/types/stream";
import { IPC_CHANNELS } from "../../src/common/constants/ipc-constants";
Expand Down Expand Up @@ -45,11 +46,6 @@ const PROVIDER_CONFIGS: Array<[string, string]> = [
// - Test timeout values (in describe/test) should be 2-3x the expected duration

describeIntegration("IpcMain sendMessage integration tests", () => {
// Enable retries in CI for flaky API tests (only works with Jest, not Bun test runner)
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

// Run tests for each provider concurrently
describe.each(PROVIDER_CONFIGS)("%s:%s provider tests", (provider, model) => {
test.concurrent(
Expand Down Expand Up @@ -1078,11 +1074,6 @@ These are general instructions that apply to all modes.

// Tool policy tests
describe("tool policy", () => {
// Retry tool policy tests in CI (they depend on external API behavior)
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(2, { logErrorsBeforeRetry: true });
}

test.each(PROVIDER_CONFIGS)(
"%s should respect tool policy that disables bash",
async (provider, model) => {
Expand Down Expand Up @@ -1504,6 +1495,9 @@ These are general instructions that apply to all modes.

// Test image support across providers
describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => {
// Retry image tests in CI as they can be flaky with some providers
configureTestRetries(3);

test.concurrent(
"should send images to AI model and get response",
async () => {
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/streamErrorRecovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,6 @@ async function collectStreamUntil(
}

describeIntegration("Stream Error Recovery (No Amnesia)", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should preserve exact prefix and continue from exact point after stream error",
async () => {
Expand Down
5 changes: 0 additions & 5 deletions tests/ipcMain/truncate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,6 @@ if (shouldRunIntegrationTests()) {
}

describeIntegration("IpcMain truncate integration tests", () => {
// Enable retries in CI for flaky API tests
if (process.env.CI && typeof jest !== "undefined" && jest.retryTimes) {
jest.retryTimes(3, { logErrorsBeforeRetry: true });
}

test.concurrent(
"should truncate 50% of chat history and verify context is updated",
async () => {
Expand Down
11 changes: 10 additions & 1 deletion tests/runtime/ssh-fixture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export async function isDockerAvailable(): Promise<boolean> {
export async function startSSHServer(): Promise<SSHServerConfig> {
// Create temp directory for SSH keys
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mux-ssh-test-"));
let containerId: string | undefined;

try {
// Generate ephemeral SSH key pair
Expand Down Expand Up @@ -93,7 +94,7 @@ export async function startSSHServer(): Promise<SSHServerConfig> {
"mux-ssh-test",
]);

const containerId = runResult.stdout.trim();
containerId = runResult.stdout.trim();

// Wait for container to be ready
await waitForContainer(containerId);
Expand Down Expand Up @@ -121,6 +122,14 @@ export async function startSSHServer(): Promise<SSHServerConfig> {
tempDir,
};
} catch (error) {
// Cleanup container on failure if it was started
if (containerId) {
try {
await execCommand("docker", ["stop", containerId], { timeout: 10000 });
} catch (cleanupError) {
console.error("Error stopping container during cleanup:", cleanupError);
}
}
// Cleanup temp directory on failure
await fs.rm(tempDir, { recursive: true, force: true });
throw error;
Expand Down