Skip to content

Commit 5c24be2

Browse files
authored
🤖 Sanitize malformed tool inputs in chat history (#442)
Fixes workspace errors when chat history contains corrupted tool inputs (e.g., string instead of object). ## Problem The httpjail-coder workspace produced this error: ``` messages.1.content.3.tool_use.input: Input should be a valid dictionary ``` Root cause: A message in history had a malformed tool input where the AI generated invalid JSON: ```json { "type": "dynamic-tool", "input": "{\"script\" timeout_secs=\"10\": \"ls\"}" // String instead of object } ``` When loaded from history and sent to the API, this was rejected. ## Solution Created `sanitizeToolInputs()` that runs before sending messages to the API: - Scans all assistant messages for tool parts - Replaces invalid inputs (string/null/array) with empty objects `{}` - Preserves valid object inputs unchanged - Original history remains untouched ## Testing - 10 tests including reproduction with the actual problematic message - All edge cases covered (string, null, array, valid objects) - No existing tests broken _Generated with `cmux`_
1 parent 5c93d22 commit 5c24be2

File tree

3 files changed

+259
-1
lines changed

3 files changed

+259
-1
lines changed

src/services/aiService.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import * as os from "os";
33
import { EventEmitter } from "events";
44
import { convertToModelMessages, type LanguageModel } from "ai";
55
import { applyToolOutputRedaction } from "@/utils/messages/applyToolOutputRedaction";
6+
import { sanitizeToolInputs } from "@/utils/messages/sanitizeToolInput";
67
import type { Result } from "@/types/result";
78
import { Ok, Err } from "@/types/result";
89
import type { WorkspaceMetadata } from "@/types/workspace";
@@ -461,10 +462,16 @@ export class AIService extends EventEmitter {
461462
const redactedForProvider = applyToolOutputRedaction(messagesWithModeContext);
462463
log.debug_obj(`${workspaceId}/2a_redacted_messages.json`, redactedForProvider);
463464

465+
// Sanitize tool inputs to ensure they are valid objects (not strings or arrays)
466+
// This fixes cases where corrupted data in history has malformed tool inputs
467+
// that would cause API errors like "Input should be a valid dictionary"
468+
const sanitizedMessages = sanitizeToolInputs(redactedForProvider);
469+
log.debug_obj(`${workspaceId}/2b_sanitized_messages.json`, sanitizedMessages);
470+
464471
// Convert CmuxMessage to ModelMessage format using Vercel AI SDK utility
465472
// Type assertion needed because CmuxMessage has custom tool parts for interrupted tools
466473
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-argument
467-
const modelMessages = convertToModelMessages(redactedForProvider as any);
474+
const modelMessages = convertToModelMessages(sanitizedMessages as any);
468475
log.debug_obj(`${workspaceId}/2_model_messages.json`, modelMessages);
469476

470477
// Apply ModelMessage transforms based on provider requirements
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import { describe, it, expect } from "@jest/globals";
2+
import type { CmuxMessage } from "@/types/message";
3+
import { sanitizeToolInputs } from "./sanitizeToolInput";
4+
5+
describe("sanitizeToolInputs", () => {
6+
it("should handle the actual malformed message from httpjail-coder workspace", () => {
7+
// This is the actual problematic message that caused the bug
8+
const problematicMessage: CmuxMessage = {
9+
id: "assistant-1761527027508-karjrpf3g",
10+
role: "assistant",
11+
metadata: {
12+
historySequence: 1,
13+
timestamp: 1761527027508,
14+
partial: true,
15+
},
16+
parts: [
17+
{
18+
type: "text",
19+
text: "I'll explore this repository.",
20+
},
21+
{
22+
type: "dynamic-tool",
23+
toolCallId: "toolu_01DXeXp8oArG4PzT9rk4hz5c",
24+
toolName: "bash",
25+
state: "output-available",
26+
// THIS IS THE MALFORMED INPUT - string instead of object
27+
input: '{"script" timeout_secs="10": "ls"}',
28+
output: {
29+
error: "Invalid input for tool bash: JSON parsing failed",
30+
},
31+
},
32+
],
33+
};
34+
35+
const sanitized = sanitizeToolInputs([problematicMessage]);
36+
const sanitizedTool = sanitized[0].parts[1];
37+
38+
if (sanitizedTool.type === "dynamic-tool") {
39+
// Should be converted to empty object
40+
expect(sanitizedTool.input).toEqual({});
41+
}
42+
});
43+
44+
it("should convert string inputs to empty objects", () => {
45+
const messages: CmuxMessage[] = [
46+
{
47+
id: "test-1",
48+
role: "assistant",
49+
parts: [
50+
{
51+
type: "dynamic-tool",
52+
toolCallId: "toolu_01test",
53+
toolName: "bash",
54+
state: "output-available",
55+
input: "not an object",
56+
output: { error: "Invalid input" },
57+
},
58+
],
59+
metadata: { timestamp: Date.now(), historySequence: 1 },
60+
},
61+
];
62+
63+
const sanitized = sanitizeToolInputs(messages);
64+
expect(sanitized[0].parts[0]).toMatchObject({
65+
type: "dynamic-tool",
66+
input: {}, // Should be converted to empty object
67+
});
68+
});
69+
70+
it("should keep valid object inputs unchanged", () => {
71+
const messages: CmuxMessage[] = [
72+
{
73+
id: "test-2",
74+
role: "assistant",
75+
parts: [
76+
{
77+
type: "dynamic-tool",
78+
toolCallId: "toolu_02test",
79+
toolName: "bash",
80+
state: "output-available",
81+
input: { script: "ls", timeout_secs: 10 },
82+
output: { success: true },
83+
},
84+
],
85+
metadata: { timestamp: Date.now(), historySequence: 2 },
86+
},
87+
];
88+
89+
const sanitized = sanitizeToolInputs(messages);
90+
expect(sanitized[0].parts[0]).toMatchObject({
91+
type: "dynamic-tool",
92+
input: { script: "ls", timeout_secs: 10 },
93+
});
94+
});
95+
96+
it("should not modify non-assistant messages", () => {
97+
const messages: CmuxMessage[] = [
98+
{
99+
id: "test-3",
100+
role: "user",
101+
parts: [{ type: "text", text: "Hello" }],
102+
metadata: { timestamp: Date.now(), historySequence: 3 },
103+
},
104+
];
105+
106+
const sanitized = sanitizeToolInputs(messages);
107+
expect(sanitized).toEqual(messages);
108+
});
109+
110+
it("should handle messages with multiple parts", () => {
111+
const messages: CmuxMessage[] = [
112+
{
113+
id: "test-4",
114+
role: "assistant",
115+
parts: [
116+
{ type: "text", text: "Let me run this command" },
117+
{
118+
type: "dynamic-tool",
119+
toolCallId: "toolu_04test",
120+
toolName: "bash",
121+
state: "output-available",
122+
input: "malformed",
123+
output: { error: "bad" },
124+
},
125+
{ type: "text", text: "Done" },
126+
],
127+
metadata: { timestamp: Date.now(), historySequence: 4 },
128+
},
129+
];
130+
131+
const sanitized = sanitizeToolInputs(messages);
132+
expect(sanitized[0].parts[1]).toMatchObject({
133+
type: "dynamic-tool",
134+
input: {},
135+
});
136+
// Other parts should be unchanged
137+
expect(sanitized[0].parts[0]).toEqual({ type: "text", text: "Let me run this command" });
138+
expect(sanitized[0].parts[2]).toEqual({ type: "text", text: "Done" });
139+
});
140+
141+
it("should handle null input", () => {
142+
const messages: CmuxMessage[] = [
143+
{
144+
id: "test-null",
145+
role: "assistant",
146+
parts: [
147+
{
148+
type: "dynamic-tool",
149+
toolCallId: "toolu_null",
150+
toolName: "bash",
151+
state: "output-available",
152+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
153+
input: null as any,
154+
output: { error: "Invalid" },
155+
},
156+
],
157+
metadata: { timestamp: Date.now(), historySequence: 1 },
158+
},
159+
];
160+
161+
const sanitized = sanitizeToolInputs(messages);
162+
const toolPart = sanitized[0].parts[0];
163+
if (toolPart.type === "dynamic-tool") {
164+
expect(toolPart.input).toEqual({});
165+
}
166+
});
167+
168+
it("should handle array input", () => {
169+
const messages: CmuxMessage[] = [
170+
{
171+
id: "test-array",
172+
role: "assistant",
173+
parts: [
174+
{
175+
type: "dynamic-tool",
176+
toolCallId: "toolu_array",
177+
toolName: "bash",
178+
state: "output-available",
179+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
180+
input: ["not", "valid"] as any,
181+
output: { error: "Invalid" },
182+
},
183+
],
184+
metadata: { timestamp: Date.now(), historySequence: 1 },
185+
},
186+
];
187+
188+
const sanitized = sanitizeToolInputs(messages);
189+
const toolPart = sanitized[0].parts[0];
190+
if (toolPart.type === "dynamic-tool") {
191+
expect(toolPart.input).toEqual({});
192+
}
193+
});
194+
});
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import type { CmuxMessage, CmuxToolPart } from "@/types/message";
2+
3+
/**
4+
* Sanitizes tool inputs in messages to ensure they are valid objects.
5+
*
6+
* The Anthropic API (and other LLM APIs) require tool inputs to be objects/dictionaries.
7+
* However, if the model generates malformed JSON or if we have corrupted data in history,
8+
* the input field might be a string instead of an object.
9+
*
10+
* This causes API errors like: "Input should be a valid dictionary"
11+
*
12+
* This function ensures all tool inputs are objects by converting non-object inputs
13+
* to empty objects. This allows the conversation to continue even with corrupted history.
14+
*
15+
* @param messages - Messages to sanitize
16+
* @returns New array with sanitized messages (original messages are not modified)
17+
*/
18+
export function sanitizeToolInputs(messages: CmuxMessage[]): CmuxMessage[] {
19+
return messages.map((msg) => {
20+
// Only process assistant messages with tool parts
21+
if (msg.role !== "assistant") {
22+
return msg;
23+
}
24+
25+
// Check if any parts need sanitization
26+
const needsSanitization = msg.parts.some(
27+
(part) =>
28+
part.type === "dynamic-tool" &&
29+
(typeof part.input !== "object" || part.input === null || Array.isArray(part.input))
30+
);
31+
32+
if (!needsSanitization) {
33+
return msg;
34+
}
35+
36+
// Create new message with sanitized parts
37+
return {
38+
...msg,
39+
parts: msg.parts.map((part): typeof part => {
40+
if (part.type !== "dynamic-tool") {
41+
return part;
42+
}
43+
44+
// Sanitize the input if it's not a valid object
45+
if (typeof part.input !== "object" || part.input === null || Array.isArray(part.input)) {
46+
const sanitized: CmuxToolPart = {
47+
...part,
48+
input: {}, // Replace with empty object
49+
};
50+
return sanitized;
51+
}
52+
53+
return part;
54+
}),
55+
};
56+
});
57+
}

0 commit comments

Comments
 (0)