From 2f6c4e558c81b22149338a7213f5feaa2e952d71 Mon Sep 17 00:00:00 2001 From: Ezzy Rappeport Date: Tue, 28 Apr 2026 14:30:15 -0700 Subject: [PATCH 1/3] fix: merge text-only user message parts for NVIDIA/OpenAI-compatible backends When a user attaches images to a message sent to a text-only model (e.g. DeepSeek V4 Pro on NVIDIA NIM), unsupportedParts() converts each image to an error-text part. The resulting user message content is then an array of two or more text objects, which the @ai-sdk/openai-compatible SDK serialises as a JSON array. NVIDIA's Python backend passes that array through a str.join() call and raises "sequence item N: expected str instance, list found". Fix: after unsupportedParts runs, mergeTextParts() collapses all-text-part arrays into a single text part for text-only models. The SDK then emits "content": "..." (scalar) instead of the array form, which every backend handles correctly. Also adds DeepSeek V4 Flash and V4 Pro to the NVIDIA section of the test fixture so the models are discoverable in unit tests. --- packages/opencode/src/provider/transform.ts | 22 ++++++ .../opencode/test/provider/transform.test.ts | 76 +++++++++++++++++++ .../test/tool/fixtures/models-api.json | 36 +++++++++ 3 files changed, 134 insertions(+) diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index a8f2fcf30857..a62ca5596d7f 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -340,8 +340,30 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes }) } +// After unsupportedParts converts non-text file/image parts to error-text parts, a user message +// may end up with multiple text-only parts (e.g. original text + "ERROR: Cannot read image…"). +// Some OpenAI-compatible backends (e.g. NVIDIA NIM) send a 500 when content is an array instead +// of a plain string. For text-only models, merging all-text content into one part lets the SDK +// emit the scalar form ("content": "…") instead of an array. +function mergeTextParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { + // Only needed for models that don't support any non-text input; multimodal models handle arrays fine. + const input = model.capabilities.input + if (!input) return msgs + const isTextOnly = !input.image && !input.audio && !input.video && !input.pdf + if (!isTextOnly) return msgs + + return msgs.map((msg) => { + if (msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length <= 1) return msg + const allText = msg.content.every((part) => part.type === "text") + if (!allText) return msg + const merged = (msg.content as Array<{ type: "text"; text: string }>).map((p) => p.text).join("\n\n") + return { ...msg, content: [{ type: "text" as const, text: merged }] } + }) +} + export function message(msgs: ModelMessage[], model: Provider.Model, options: Record) { msgs = unsupportedParts(msgs, model) + msgs = mergeTextParts(msgs, model) msgs = normalizeMessages(msgs, model, options) if ( (model.providerID === "anthropic" || diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index c4831fa82f1c..f6f48613d673 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -3314,3 +3314,79 @@ describe("ProviderTransform.variants", () => { }) }) }) + +describe("ProviderTransform.message - mergeTextParts for text-only models", () => { + const textOnlyModel = { + id: "nvidia/deepseek-ai/deepseek-v4-pro", + providerID: "nvidia", + api: { + id: "deepseek-ai/deepseek-v4-pro", + url: "https://integrate.api.nvidia.com/v1", + npm: "@ai-sdk/openai-compatible", + }, + name: "DeepSeek V4 Pro", + capabilities: { + temperature: true, + reasoning: true, + attachment: false, + toolcall: true, + input: { text: true, audio: false, image: false, video: false, pdf: false }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: { field: "reasoning_content" }, + }, + cost: { input: 1.74, output: 3.48, cache: { read: 0.145, write: 0 } }, + limit: { context: 1048576, output: 393216 }, + status: "active", + options: {}, + headers: {}, + release_date: "2026-04-24", + } as any + + test("merges multiple text parts into one when model is text-only", () => { + const msgs = [ + { + role: "user", + content: [ + { type: "text", text: "Analyze this school page" }, + { type: "file", mediaType: "image/png", data: "base64data", filename: "screenshot.png" }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, textOnlyModel, {}) + + expect(result).toHaveLength(1) + // Image is unsupported → converted to error text → merged with original text into one part + expect(result[0].content).toHaveLength(1) + expect(result[0].content[0].type).toBe("text") + expect(result[0].content[0].text).toContain("Analyze this school page") + expect(result[0].content[0].text).toContain("ERROR") + }) + + test("does not merge when model supports images (multimodal)", () => { + const multimodalModel = { + ...textOnlyModel, + capabilities: { + ...textOnlyModel.capabilities, + input: { text: true, audio: false, image: true, video: false, pdf: false }, + }, + } + const validBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + const msgs = [ + { + role: "user", + content: [ + { type: "text", text: "Analyze this" }, + { type: "image", image: `data:image/png;base64,${validBase64}` }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, multimodalModel, {}) + + expect(result).toHaveLength(1) + // Image is supported → kept as-is → two separate parts, no merging + expect(result[0].content).toHaveLength(2) + }) +}) diff --git a/packages/opencode/test/tool/fixtures/models-api.json b/packages/opencode/test/tool/fixtures/models-api.json index 5a3eb7e8010e..378e3feeb7a3 100644 --- a/packages/opencode/test/tool/fixtures/models-api.json +++ b/packages/opencode/test/tool/fixtures/models-api.json @@ -13265,6 +13265,42 @@ "cost": { "input": 0, "output": 0 }, "limit": { "context": 163840, "output": 65536 } }, + "deepseek-ai/deepseek-v4-flash": { + "id": "deepseek-ai/deepseek-v4-flash", + "name": "DeepSeek V4 Flash", + "family": "deepseek-flash", + "attachment": false, + "reasoning": true, + "tool_call": true, + "interleaved": { "field": "reasoning_content" }, + "structured_output": true, + "temperature": true, + "knowledge": "2025-05", + "release_date": "2026-04-24", + "last_updated": "2026-04-24", + "modalities": { "input": ["text"], "output": ["text"] }, + "open_weights": true, + "cost": { "input": 0.14, "output": 0.28, "cache_read": 0.028 }, + "limit": { "context": 1048576, "output": 393216 } + }, + "deepseek-ai/deepseek-v4-pro": { + "id": "deepseek-ai/deepseek-v4-pro", + "name": "DeepSeek V4 Pro", + "family": "deepseek-thinking", + "attachment": false, + "reasoning": true, + "tool_call": true, + "interleaved": { "field": "reasoning_content" }, + "structured_output": true, + "temperature": true, + "knowledge": "2025-05", + "release_date": "2026-04-24", + "last_updated": "2026-04-24", + "modalities": { "input": ["text"], "output": ["text"] }, + "open_weights": true, + "cost": { "input": 1.74, "output": 3.48, "cache_read": 0.145 }, + "limit": { "context": 1048576, "output": 393216 } + }, "qwen/qwq-32b": { "id": "qwen/qwq-32b", "name": "Qwq 32b", From 42e7ceb511e68b7fbe67126e713bc24a811317aa Mon Sep 17 00:00:00 2001 From: Ezzy Rappeport Date: Tue, 28 Apr 2026 14:33:12 -0700 Subject: [PATCH 2/3] chore: update bun.lock --- bun.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bun.lock b/bun.lock index c0337c4a6102..3c5c3e81a0ab 100644 --- a/bun.lock +++ b/bun.lock @@ -2149,7 +2149,7 @@ "@solidjs/router": ["@solidjs/router@0.15.4", "", { "peerDependencies": { "solid-js": "^1.8.6" } }, "sha512-WOpgg9a9T638cR+5FGbFi/IV4l2FpmBs1GpIMSPa0Ce9vyJN7Wts+X2PqMf9IYn0zUj2MlSJtm1gp7/HI/n5TQ=="], - "@solidjs/start": ["@solidjs/start@https://pkg.pr.new/@solidjs/start@dfb2020", { "dependencies": { "@babel/core": "^7.28.3", "@babel/traverse": "^7.28.3", "@babel/types": "^7.28.5", "@solidjs/meta": "^0.29.4", "@tanstack/server-functions-plugin": "1.134.5", "@types/babel__traverse": "^7.28.0", "@types/micromatch": "^4.0.9", "cookie-es": "^2.0.0", "defu": "^6.1.4", "error-stack-parser": "^2.1.4", "es-module-lexer": "^1.7.0", "esbuild": "^0.25.3", "fast-glob": "^3.3.3", "h3": "npm:h3@2.0.1-rc.4", "html-to-image": "^1.11.13", "micromatch": "^4.0.8", "path-to-regexp": "^8.2.0", "pathe": "^2.0.3", "radix3": "^1.1.2", "seroval": "^1.3.2", "seroval-plugins": "^1.2.1", "shiki": "^1.26.1", "solid-js": "^1.9.9", "source-map-js": "^1.2.1", "srvx": "^0.9.1", "terracotta": "^1.0.6", "vite": "7.1.10", "vite-plugin-solid": "^2.11.9", "vitest": "^4.0.10" } }, "sha512-7JjjA49VGNOsMRI8QRUhVudZmv0CnJ18SliSgK1ojszs/c3ijftgVkzvXdkSLN4miDTzbkXewf65D6ZBo6W+GQ=="], + "@solidjs/start": ["@solidjs/start@https://pkg.pr.new/@solidjs/start@dfb2020", { "dependencies": { "@babel/core": "^7.28.3", "@babel/traverse": "^7.28.3", "@babel/types": "^7.28.5", "@solidjs/meta": "^0.29.4", "@tanstack/server-functions-plugin": "1.134.5", "@types/babel__traverse": "^7.28.0", "@types/micromatch": "^4.0.9", "cookie-es": "^2.0.0", "defu": "^6.1.4", "error-stack-parser": "^2.1.4", "es-module-lexer": "^1.7.0", "esbuild": "^0.25.3", "fast-glob": "^3.3.3", "h3": "npm:h3@2.0.1-rc.4", "html-to-image": "^1.11.13", "micromatch": "^4.0.8", "path-to-regexp": "^8.2.0", "pathe": "^2.0.3", "radix3": "^1.1.2", "seroval": "^1.3.2", "seroval-plugins": "^1.2.1", "shiki": "^1.26.1", "solid-js": "^1.9.9", "source-map-js": "^1.2.1", "srvx": "^0.9.1", "terracotta": "^1.0.6", "vite": "7.1.10", "vite-plugin-solid": "^2.11.9", "vitest": "^4.0.10" } }], "@speed-highlight/core": ["@speed-highlight/core@1.2.15", "", {}, "sha512-BMq1K3DsElxDWawkX6eLg9+CKJrTVGCBAWVuHXVUV2u0s2711qiChLSId6ikYPfxhdYocLNt3wWwSvDiTvFabw=="], @@ -3319,7 +3319,7 @@ "get-tsconfig": ["get-tsconfig@4.13.8", "", { "dependencies": { "resolve-pkg-maps": "^1.0.0" } }, "sha512-J87BxkLXykmisLQ+KA4x2+O6rVf+PJrtFUO8lGyiRg4lyxJLJ8/v0sRAKdVZQOy6tR6lMRAF1NqzCf9BQijm0w=="], - "ghostty-web": ["ghostty-web@github:anomalyco/ghostty-web#20bd361", {}, "anomalyco-ghostty-web-20bd361", "sha512-dW0nwaiBBcun9y5WJSvm3HxDLe5o9V0xLCndQvWonRVubU8CS1PHxZpLffyPt1YujPWC13ez03aWxcuKBPYYGQ=="], + "ghostty-web": ["ghostty-web@github:anomalyco/ghostty-web#20bd361", {}, "anomalyco-ghostty-web-20bd361"], "gifwrap": ["gifwrap@0.10.1", "", { "dependencies": { "image-q": "^4.0.0", "omggif": "^1.0.10" } }, "sha512-2760b1vpJHNmLzZ/ubTtNnEx5WApN/PYWJvXvgS+tL1egTTthayFYIQQNi136FLEDcN/IyEY2EcGpIITD6eYUw=="], From 63f5cb86353451ac1f8da588c8e9e6c9d3dfc570 Mon Sep 17 00:00:00 2001 From: Ezzy Rappeport Date: Tue, 28 Apr 2026 14:37:25 -0700 Subject: [PATCH 3/3] fix: cast content part to any in test to satisfy TypeScript --- packages/opencode/test/provider/transform.test.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index f6f48613d673..8477b32e04a9 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -3358,9 +3358,10 @@ describe("ProviderTransform.message - mergeTextParts for text-only models", () = expect(result).toHaveLength(1) // Image is unsupported → converted to error text → merged with original text into one part expect(result[0].content).toHaveLength(1) - expect(result[0].content[0].type).toBe("text") - expect(result[0].content[0].text).toContain("Analyze this school page") - expect(result[0].content[0].text).toContain("ERROR") + const part = result[0].content[0] as any + expect(part.type).toBe("text") + expect(part.text).toContain("Analyze this school page") + expect(part.text).toContain("ERROR") }) test("does not merge when model supports images (multimodal)", () => {