diff --git a/bun.lock b/bun.lock index c0337c4a6102..3c5c3e81a0ab 100644 --- a/bun.lock +++ b/bun.lock @@ -2149,7 +2149,7 @@ "@solidjs/router": ["@solidjs/router@0.15.4", "", { "peerDependencies": { "solid-js": "^1.8.6" } }, "sha512-WOpgg9a9T638cR+5FGbFi/IV4l2FpmBs1GpIMSPa0Ce9vyJN7Wts+X2PqMf9IYn0zUj2MlSJtm1gp7/HI/n5TQ=="], - "@solidjs/start": ["@solidjs/start@https://pkg.pr.new/@solidjs/start@dfb2020", { "dependencies": { "@babel/core": "^7.28.3", "@babel/traverse": "^7.28.3", "@babel/types": "^7.28.5", "@solidjs/meta": "^0.29.4", "@tanstack/server-functions-plugin": "1.134.5", "@types/babel__traverse": "^7.28.0", "@types/micromatch": "^4.0.9", "cookie-es": "^2.0.0", "defu": "^6.1.4", "error-stack-parser": "^2.1.4", "es-module-lexer": "^1.7.0", "esbuild": "^0.25.3", "fast-glob": "^3.3.3", "h3": "npm:h3@2.0.1-rc.4", "html-to-image": "^1.11.13", "micromatch": "^4.0.8", "path-to-regexp": "^8.2.0", "pathe": "^2.0.3", "radix3": "^1.1.2", "seroval": "^1.3.2", "seroval-plugins": "^1.2.1", "shiki": "^1.26.1", "solid-js": "^1.9.9", "source-map-js": "^1.2.1", "srvx": "^0.9.1", "terracotta": "^1.0.6", "vite": "7.1.10", "vite-plugin-solid": "^2.11.9", "vitest": "^4.0.10" } }, "sha512-7JjjA49VGNOsMRI8QRUhVudZmv0CnJ18SliSgK1ojszs/c3ijftgVkzvXdkSLN4miDTzbkXewf65D6ZBo6W+GQ=="], + "@solidjs/start": ["@solidjs/start@https://pkg.pr.new/@solidjs/start@dfb2020", { "dependencies": { "@babel/core": "^7.28.3", "@babel/traverse": "^7.28.3", "@babel/types": "^7.28.5", "@solidjs/meta": "^0.29.4", "@tanstack/server-functions-plugin": "1.134.5", "@types/babel__traverse": "^7.28.0", "@types/micromatch": "^4.0.9", "cookie-es": "^2.0.0", "defu": "^6.1.4", "error-stack-parser": "^2.1.4", "es-module-lexer": "^1.7.0", "esbuild": "^0.25.3", "fast-glob": "^3.3.3", "h3": "npm:h3@2.0.1-rc.4", "html-to-image": "^1.11.13", "micromatch": "^4.0.8", "path-to-regexp": "^8.2.0", "pathe": "^2.0.3", "radix3": "^1.1.2", "seroval": "^1.3.2", "seroval-plugins": "^1.2.1", "shiki": "^1.26.1", "solid-js": "^1.9.9", "source-map-js": "^1.2.1", "srvx": "^0.9.1", "terracotta": "^1.0.6", "vite": "7.1.10", "vite-plugin-solid": "^2.11.9", "vitest": "^4.0.10" } }], "@speed-highlight/core": ["@speed-highlight/core@1.2.15", "", {}, "sha512-BMq1K3DsElxDWawkX6eLg9+CKJrTVGCBAWVuHXVUV2u0s2711qiChLSId6ikYPfxhdYocLNt3wWwSvDiTvFabw=="], @@ -3319,7 +3319,7 @@ "get-tsconfig": ["get-tsconfig@4.13.8", "", { "dependencies": { "resolve-pkg-maps": "^1.0.0" } }, "sha512-J87BxkLXykmisLQ+KA4x2+O6rVf+PJrtFUO8lGyiRg4lyxJLJ8/v0sRAKdVZQOy6tR6lMRAF1NqzCf9BQijm0w=="], - "ghostty-web": ["ghostty-web@github:anomalyco/ghostty-web#20bd361", {}, "anomalyco-ghostty-web-20bd361", "sha512-dW0nwaiBBcun9y5WJSvm3HxDLe5o9V0xLCndQvWonRVubU8CS1PHxZpLffyPt1YujPWC13ez03aWxcuKBPYYGQ=="], + "ghostty-web": ["ghostty-web@github:anomalyco/ghostty-web#20bd361", {}, "anomalyco-ghostty-web-20bd361"], "gifwrap": ["gifwrap@0.10.1", "", { "dependencies": { "image-q": "^4.0.0", "omggif": "^1.0.10" } }, "sha512-2760b1vpJHNmLzZ/ubTtNnEx5WApN/PYWJvXvgS+tL1egTTthayFYIQQNi136FLEDcN/IyEY2EcGpIITD6eYUw=="], diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index a8f2fcf30857..a62ca5596d7f 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -340,8 +340,30 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes }) } +// After unsupportedParts converts non-text file/image parts to error-text parts, a user message +// may end up with multiple text-only parts (e.g. original text + "ERROR: Cannot read image…"). +// Some OpenAI-compatible backends (e.g. NVIDIA NIM) send a 500 when content is an array instead +// of a plain string. For text-only models, merging all-text content into one part lets the SDK +// emit the scalar form ("content": "…") instead of an array. +function mergeTextParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { + // Only needed for models that don't support any non-text input; multimodal models handle arrays fine. + const input = model.capabilities.input + if (!input) return msgs + const isTextOnly = !input.image && !input.audio && !input.video && !input.pdf + if (!isTextOnly) return msgs + + return msgs.map((msg) => { + if (msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length <= 1) return msg + const allText = msg.content.every((part) => part.type === "text") + if (!allText) return msg + const merged = (msg.content as Array<{ type: "text"; text: string }>).map((p) => p.text).join("\n\n") + return { ...msg, content: [{ type: "text" as const, text: merged }] } + }) +} + export function message(msgs: ModelMessage[], model: Provider.Model, options: Record) { msgs = unsupportedParts(msgs, model) + msgs = mergeTextParts(msgs, model) msgs = normalizeMessages(msgs, model, options) if ( (model.providerID === "anthropic" || diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts index c4831fa82f1c..8477b32e04a9 100644 --- a/packages/opencode/test/provider/transform.test.ts +++ b/packages/opencode/test/provider/transform.test.ts @@ -3314,3 +3314,80 @@ describe("ProviderTransform.variants", () => { }) }) }) + +describe("ProviderTransform.message - mergeTextParts for text-only models", () => { + const textOnlyModel = { + id: "nvidia/deepseek-ai/deepseek-v4-pro", + providerID: "nvidia", + api: { + id: "deepseek-ai/deepseek-v4-pro", + url: "https://integrate.api.nvidia.com/v1", + npm: "@ai-sdk/openai-compatible", + }, + name: "DeepSeek V4 Pro", + capabilities: { + temperature: true, + reasoning: true, + attachment: false, + toolcall: true, + input: { text: true, audio: false, image: false, video: false, pdf: false }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: { field: "reasoning_content" }, + }, + cost: { input: 1.74, output: 3.48, cache: { read: 0.145, write: 0 } }, + limit: { context: 1048576, output: 393216 }, + status: "active", + options: {}, + headers: {}, + release_date: "2026-04-24", + } as any + + test("merges multiple text parts into one when model is text-only", () => { + const msgs = [ + { + role: "user", + content: [ + { type: "text", text: "Analyze this school page" }, + { type: "file", mediaType: "image/png", data: "base64data", filename: "screenshot.png" }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, textOnlyModel, {}) + + expect(result).toHaveLength(1) + // Image is unsupported → converted to error text → merged with original text into one part + expect(result[0].content).toHaveLength(1) + const part = result[0].content[0] as any + expect(part.type).toBe("text") + expect(part.text).toContain("Analyze this school page") + expect(part.text).toContain("ERROR") + }) + + test("does not merge when model supports images (multimodal)", () => { + const multimodalModel = { + ...textOnlyModel, + capabilities: { + ...textOnlyModel.capabilities, + input: { text: true, audio: false, image: true, video: false, pdf: false }, + }, + } + const validBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + const msgs = [ + { + role: "user", + content: [ + { type: "text", text: "Analyze this" }, + { type: "image", image: `data:image/png;base64,${validBase64}` }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, multimodalModel, {}) + + expect(result).toHaveLength(1) + // Image is supported → kept as-is → two separate parts, no merging + expect(result[0].content).toHaveLength(2) + }) +}) diff --git a/packages/opencode/test/tool/fixtures/models-api.json b/packages/opencode/test/tool/fixtures/models-api.json index 5a3eb7e8010e..378e3feeb7a3 100644 --- a/packages/opencode/test/tool/fixtures/models-api.json +++ b/packages/opencode/test/tool/fixtures/models-api.json @@ -13265,6 +13265,42 @@ "cost": { "input": 0, "output": 0 }, "limit": { "context": 163840, "output": 65536 } }, + "deepseek-ai/deepseek-v4-flash": { + "id": "deepseek-ai/deepseek-v4-flash", + "name": "DeepSeek V4 Flash", + "family": "deepseek-flash", + "attachment": false, + "reasoning": true, + "tool_call": true, + "interleaved": { "field": "reasoning_content" }, + "structured_output": true, + "temperature": true, + "knowledge": "2025-05", + "release_date": "2026-04-24", + "last_updated": "2026-04-24", + "modalities": { "input": ["text"], "output": ["text"] }, + "open_weights": true, + "cost": { "input": 0.14, "output": 0.28, "cache_read": 0.028 }, + "limit": { "context": 1048576, "output": 393216 } + }, + "deepseek-ai/deepseek-v4-pro": { + "id": "deepseek-ai/deepseek-v4-pro", + "name": "DeepSeek V4 Pro", + "family": "deepseek-thinking", + "attachment": false, + "reasoning": true, + "tool_call": true, + "interleaved": { "field": "reasoning_content" }, + "structured_output": true, + "temperature": true, + "knowledge": "2025-05", + "release_date": "2026-04-24", + "last_updated": "2026-04-24", + "modalities": { "input": ["text"], "output": ["text"] }, + "open_weights": true, + "cost": { "input": 1.74, "output": 3.48, "cache_read": 0.145 }, + "limit": { "context": 1048576, "output": 393216 } + }, "qwen/qwq-32b": { "id": "qwen/qwq-32b", "name": "Qwq 32b",