fix: LLMVisionProvider passes ContentPart[] directly instead of JSON.stringify

jddunn · jddunn · commit ea5b58aad514 · 2026-04-12T01:10:05.000-07:00
diff --git a/src/vision/providers/LLMVisionProvider.ts b/src/vision/providers/LLMVisionProvider.ts
@@ -177,21 +177,19 @@ export class LLMVisionProvider implements IVisionProvider {
     const { generateText } = await import('../../api/generateText.js');
 
     // Build the multimodal message with text prompt + image.
-    // The content array format is the standard multimodal message shape
-    // accepted by all major vision LLM providers (OpenAI, Anthropic, Gemini).
+    // Message.content now natively accepts MessageContentPart[] so we
+    // pass the structured array directly instead of JSON.stringify.
     const result = await generateText({
       provider: this._config.provider,
       model: this._config.model,
       apiKey: this._config.apiKey,
       baseUrl: this._config.baseUrl,
       messages: [{
         role: 'user',
-        // Serialize the content parts array as JSON. The provider adapter
-        // will parse it back into the appropriate multimodal format.
-        content: JSON.stringify([
+        content: [
           { type: 'text', text: this._prompt },
           { type: 'image_url', image_url: { url: image } },
-        ]),
+        ],
       }],
     });