🤖 Add integration test for OpenAI auto-truncation

ammario · ammario · commit d18a38b2cfb3 · 2025-10-08T09:04:11.000-05:00
Add disableAutoTruncation flag to SendMessageOptions for testing context
overflow behavior. Test verifies:
1. Context limit exceeded when auto-truncation disabled
2. Successful recovery with auto-truncation enabled

Test sends large messages (~10k tokens each) to trigger 128k context limit,
then verifies truncation:auto allows continuation. Will run in CI with API keys.

_Generated with `cmux`_
diff --git a/src/services/aiService.ts b/src/services/aiService.ts
@@ -173,7 +173,10 @@ export class AIService extends EventEmitter {
    * constructor, ensuring automatic parity with Vercel AI SDK - any configuration options
    * supported by the provider will work without modification.
    */
-  private createModel(modelString: string): Result<LanguageModel, SendMessageError> {
+  private createModel(
+    modelString: string,
+    options?: { disableAutoTruncation?: boolean }
+  ): Result<LanguageModel, SendMessageError> {
     try {
       // Parse model string (format: "provider:model-id")
       const [providerName, modelId] = modelString.split(":");
@@ -223,6 +226,8 @@ export class AIService extends EventEmitter {
         // This is a temporary override until @ai-sdk/openai supports passing
         // truncation via providerOptions. Safe because it only targets the
         // OpenAI Responses endpoint and leaves other providers untouched.
+        // Can be disabled via options for testing purposes.
+        const disableAutoTruncation = options?.disableAutoTruncation ?? false;
         const fetchWithOpenAITruncation = Object.assign(
           async (
             input: Parameters<typeof fetch>[0],
@@ -249,7 +254,12 @@ export class AIService extends EventEmitter {
               const isOpenAIResponses = /\/v1\/responses(\?|$)/.test(urlString);
 
               const body = init?.body;
-              if (isOpenAIResponses && method === "POST" && typeof body === "string") {
+              if (
+                !disableAutoTruncation &&
+                isOpenAIResponses &&
+                method === "POST" &&
+                typeof body === "string"
+              ) {
                 // Clone headers to avoid mutating caller-provided objects
                 const headers = new Headers(init?.headers);
                 // Remove content-length if present, since body will change
@@ -329,7 +339,8 @@ export class AIService extends EventEmitter {
     toolPolicy?: ToolPolicy,
     abortSignal?: AbortSignal,
     additionalSystemInstructions?: string,
-    maxOutputTokens?: number
+    maxOutputTokens?: number,
+    disableAutoTruncation?: boolean
   ): Promise<Result<void, SendMessageError>> {
     try {
       // DEBUG: Log streamMessage call
@@ -343,7 +354,7 @@ export class AIService extends EventEmitter {
       await this.partialService.commitToHistory(workspaceId);
 
       // Create model instance with early API key validation
-      const modelResult = this.createModel(modelString);
+      const modelResult = this.createModel(modelString, { disableAutoTruncation });
       if (!modelResult.success) {
         return Err(modelResult.error);
       }
diff --git a/src/services/ipcMain.ts b/src/services/ipcMain.ts
@@ -419,6 +419,7 @@ export class IpcMain {
           toolPolicy,
           additionalSystemInstructions,
           maxOutputTokens,
+          disableAutoTruncation,
         } = options ?? {};
         log.debug("sendMessage handler: Received", {
           workspaceId,
@@ -429,6 +430,7 @@ export class IpcMain {
           toolPolicy,
           additionalSystemInstructions,
           maxOutputTokens,
+          disableAutoTruncation,
         });
         try {
           // Early exit: empty message = either interrupt (if streaming) or invalid input
@@ -523,6 +525,7 @@ export class IpcMain {
             toolPolicy,
             additionalSystemInstructions,
             maxOutputTokens,
+            disableAutoTruncation,
           });
           const streamResult = await this.aiService.streamMessage(
             historyResult.data,
@@ -532,7 +535,8 @@ export class IpcMain {
             toolPolicy,
             undefined,
             additionalSystemInstructions,
-            maxOutputTokens
+            maxOutputTokens,
+            disableAutoTruncation
           );
           log.debug("sendMessage handler: Stream completed");
           return streamResult;
diff --git a/src/types/ipc.ts b/src/types/ipc.ts
@@ -130,6 +130,7 @@ export interface SendMessageOptions {
   toolPolicy?: ToolPolicy;
   additionalSystemInstructions?: string;
   maxOutputTokens?: number;
+  disableAutoTruncation?: boolean; // For testing truncation behavior
 }
 
 // API method signatures (shared between main and preload)
diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts
@@ -956,4 +956,81 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
       15000
     );
   });
+
+  // OpenAI auto truncation integration test
+  // This test verifies that the truncation: "auto" parameter works correctly
+  // by first forcing a context overflow error, then verifying recovery with auto-truncation
+  describeIntegration("OpenAI auto truncation integration", () => {
+    const provider = "openai";
+    const model = "gpt-4o-mini";
+
+    test.concurrent(
+      "respects disableAutoTruncation flag",
+      async () => {
+        const { env, workspaceId, cleanup } = await setupWorkspace(provider);
+
+        try {
+          // Phase 1: Send large messages until context error occurs
+          // gpt-4o-mini has ~128k token context window
+          // Each chunk is ~10k tokens (40k chars / 4 chars per token)
+          const largeChunk = "A".repeat(40000);
+          let contextError: unknown = null;
+
+          // Send up to 20 large messages (200k tokens total)
+          // Should exceed 128k context limit and trigger error
+          for (let i = 0; i < 20; i++) {
+            const result = await sendMessageWithModel(
+              env.mockIpcRenderer,
+              workspaceId,
+              largeChunk,
+              provider,
+              model,
+              { disableAutoTruncation: true }
+            );
+
+            if (!result.success) {
+              contextError = result.error;
+              break;
+            }
+
+            // Wait for stream completion
+            const collector = createEventCollector(env.sentEvents, workspaceId);
+            await collector.waitForEvent("stream-end", 60000);
+            assertStreamSuccess(collector);
+            env.sentEvents.length = 0; // Clear events for next iteration
+          }
+
+          // Verify we hit a context error
+          expect(contextError).not.toBeNull();
+          // Check that error message contains context-related keywords
+          const errorStr = JSON.stringify(contextError).toLowerCase();
+          expect(
+            errorStr.includes("context") ||
+              errorStr.includes("length") ||
+              errorStr.includes("exceed") ||
+              errorStr.includes("token")
+          ).toBe(true);
+
+          // Phase 2: Send message with auto-truncation enabled (should succeed)
+          env.sentEvents.length = 0;
+          const successResult = await sendMessageWithModel(
+            env.mockIpcRenderer,
+            workspaceId,
+            "Final message after auto truncation",
+            provider,
+            model
+            // disableAutoTruncation defaults to false (auto-truncation enabled)
+          );
+
+          expect(successResult.success).toBe(true);
+          const collector = createEventCollector(env.sentEvents, workspaceId);
+          await collector.waitForEvent("stream-end", 60000);
+          assertStreamSuccess(collector);
+        } finally {
+          await cleanup();
+        }
+      },
+      180000 // 3 minute timeout for heavy test with multiple API calls
+    );
+  });
 });

Original file line number	Diff line number	Diff line change
`@@ -130,6 +130,7 @@ export interface SendMessageOptions {`
`130`	`130`	`toolPolicy?: ToolPolicy;`
`131`	`131`	`additionalSystemInstructions?: string;`
`132`	`132`	`maxOutputTokens?: number;`
	`133`	`+ disableAutoTruncation?: boolean; // For testing truncation behavior`
`133`	`134`	`}`
`134`	`135`
`135`	`136`	`// API method signatures (shared between main and preload)`