anomalyco · ezzy1630 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/bun.lock b/bun.lock
diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
@@ -340,8 +340,30 @@ function unsupportedParts(msgs: ModelMessage[], model: Provider.Model): ModelMes
   })
 }
 
+// After unsupportedParts converts non-text file/image parts to error-text parts, a user message
+// may end up with multiple text-only parts (e.g. original text + "ERROR: Cannot read image…").
+// Some OpenAI-compatible backends (e.g. NVIDIA NIM) send a 500 when content is an array instead
+// of a plain string.  For text-only models, merging all-text content into one part lets the SDK
+// emit the scalar form ("content": "…") instead of an array.
+function mergeTextParts(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
+  // Only needed for models that don't support any non-text input; multimodal models handle arrays fine.
+  const input = model.capabilities.input
+  if (!input) return msgs
+  const isTextOnly = !input.image && !input.audio && !input.video && !input.pdf
+  if (!isTextOnly) return msgs
+
+  return msgs.map((msg) => {
+    if (msg.role !== "user" || !Array.isArray(msg.content) || msg.content.length <= 1) return msg
+    const allText = msg.content.every((part) => part.type === "text")
+    if (!allText) return msg
+    const merged = (msg.content as Array<{ type: "text"; text: string }>).map((p) => p.text).join("\n\n")
+    return { ...msg, content: [{ type: "text" as const, text: merged }] }
+  })
+}
+
 export function message(msgs: ModelMessage[], model: Provider.Model, options: Record<string, unknown>) {
   msgs = unsupportedParts(msgs, model)
+  msgs = mergeTextParts(msgs, model)
   msgs = normalizeMessages(msgs, model, options)
   if (
     (model.providerID === "anthropic" ||

diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
@@ -3314,3 +3314,80 @@ describe("ProviderTransform.variants", () => {
     })
   })
 })
+
+describe("ProviderTransform.message - mergeTextParts for text-only models", () => {
+  const textOnlyModel = {
+    id: "nvidia/deepseek-ai/deepseek-v4-pro",
+    providerID: "nvidia",
+    api: {
+      id: "deepseek-ai/deepseek-v4-pro",
+      url: "https://integrate.api.nvidia.com/v1",
+      npm: "@ai-sdk/openai-compatible",
+    },
+    name: "DeepSeek V4 Pro",
+    capabilities: {
+      temperature: true,
+      reasoning: true,
+      attachment: false,
+      toolcall: true,
+      input: { text: true, audio: false, image: false, video: false, pdf: false },
+      output: { text: true, audio: false, image: false, video: false, pdf: false },
+      interleaved: { field: "reasoning_content" },
+    },
+    cost: { input: 1.74, output: 3.48, cache: { read: 0.145, write: 0 } },
+    limit: { context: 1048576, output: 393216 },
+    status: "active",
+    options: {},
+    headers: {},
+    release_date: "2026-04-24",
+  } as any
+
+  test("merges multiple text parts into one when model is text-only", () => {
+    const msgs = [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: "Analyze this school page" },
+          { type: "file", mediaType: "image/png", data: "base64data", filename: "screenshot.png" },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, textOnlyModel, {})
+
+    expect(result).toHaveLength(1)
+    // Image is unsupported → converted to error text → merged with original text into one part
+    expect(result[0].content).toHaveLength(1)
+    const part = result[0].content[0] as any
+    expect(part.type).toBe("text")
+    expect(part.text).toContain("Analyze this school page")
+    expect(part.text).toContain("ERROR")
+  })
+
+  test("does not merge when model supports images (multimodal)", () => {
+    const multimodalModel = {
+      ...textOnlyModel,
+      capabilities: {
+        ...textOnlyModel.capabilities,
+        input: { text: true, audio: false, image: true, video: false, pdf: false },
+      },
+    }
+    const validBase64 =
+      "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+    const msgs = [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: "Analyze this" },
+          { type: "image", image: `data:image/png;base64,${validBase64}` },
+        ],
+      },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, multimodalModel, {})
+
+    expect(result).toHaveLength(1)
+    // Image is supported → kept as-is → two separate parts, no merging
+    expect(result[0].content).toHaveLength(2)
+  })
+})
diff --git a/packages/opencode/test/tool/fixtures/models-api.json b/packages/opencode/test/tool/fixtures/models-api.json
@@ -13265,6 +13265,42 @@
         "cost": { "input": 0, "output": 0 },
         "limit": { "context": 163840, "output": 65536 }
       },
+      "deepseek-ai/deepseek-v4-flash": {
+        "id": "deepseek-ai/deepseek-v4-flash",
+        "name": "DeepSeek V4 Flash",
+        "family": "deepseek-flash",
+        "attachment": false,
+        "reasoning": true,
+        "tool_call": true,
+        "interleaved": { "field": "reasoning_content" },
+        "structured_output": true,
+        "temperature": true,
+        "knowledge": "2025-05",
+        "release_date": "2026-04-24",
+        "last_updated": "2026-04-24",
+        "modalities": { "input": ["text"], "output": ["text"] },
+        "open_weights": true,
+        "cost": { "input": 0.14, "output": 0.28, "cache_read": 0.028 },
+        "limit": { "context": 1048576, "output": 393216 }
+      },
+      "deepseek-ai/deepseek-v4-pro": {
+        "id": "deepseek-ai/deepseek-v4-pro",
+        "name": "DeepSeek V4 Pro",
+        "family": "deepseek-thinking",
+        "attachment": false,
+        "reasoning": true,
+        "tool_call": true,
+        "interleaved": { "field": "reasoning_content" },
+        "structured_output": true,
+        "temperature": true,
+        "knowledge": "2025-05",
+        "release_date": "2026-04-24",
+        "last_updated": "2026-04-24",
+        "modalities": { "input": ["text"], "output": ["text"] },
+        "open_weights": true,
+        "cost": { "input": 1.74, "output": 3.48, "cache_read": 0.145 },
+        "limit": { "context": 1048576, "output": 393216 }
+      },
       "qwen/qwq-32b": {
         "id": "qwen/qwq-32b",
         "name": "Qwq 32b",