agents-api/__snapshots__/openapi.json

-Original file line number
+Diff line change
@@ Expand Up / @@ -32663,6 +32663,51 @@ @@
                             "parts": {
                               "items": {
                                 "anyOf": [
+                                  {
+                                    "properties": {
+                                      "text": {
+                                        "type": "string"
+                                      },
+                                      "type": {
+                                        "enum": [
+                                          "text"
+                                        ],
+                                        "type": "string"
+                                      }
+                                    },
+                                    "required": [
+                                      "type",
+                                      "text"
+                                    ],
+                                    "type": "object"
+                                  },
+                                  {
+                                    "properties": {
+                                      "text": {
+                                        "anyOf": [
+                                          {
+                                            "format": "uri",
+                                            "type": "string"
+                                          },
+                                          {
+                                            "pattern": "^data:image\\/(png|jpeg|jpg|webp);base64,",
+                                            "type": "string"
+                                          }
+                                        ]
+                                      },
+                                      "type": {
+                                        "enum": [
+                                          "image"
+                                        ],
+                                        "type": "string"
+                                      }
+                                    },
+                                    "required": [
+                                      "type",
+                                      "text"
+                                    ],
+                                    "type": "object"
+                                  },
                                   {
                                     "properties": {
                                       "text": {
@@ Expand All / @@ -32672,8 +32717,6 @@ @@
                                         "anyOf": [
                                           {
                                             "enum": [
-                                              "text",
-                                              "image",
                                               "audio",
                                               "video",
                                               "file"
@@ Expand Down Expand Up / @@ -33264,19 +33307,69 @@ @@
                                 },
                                 {
                                   "items": {
-                                    "additionalProperties": false,
-                                    "properties": {
-                                      "text": {
-                                        "type": "string"
+                                    "oneOf": [
+                                      {
+                                        "properties": {
+                                          "text": {
+                                            "type": "string"
+                                          },
+                                          "type": {
+                                            "enum": [
+                                              "text"
+                                            ],
+                                            "type": "string"
+                                          }
+                                        },
+                                        "required": [
+                                          "type",
+                                          "text"
+                                        ],
+                                        "type": "object"
                                       },
-                                      "type": {
-                                        "type": "string"
+                                      {
+                                        "properties": {
+                                          "image_url": {
+                                            "properties": {
+                                              "detail": {
+                                                "enum": [
+                                                  "auto",
+                                                  "low",
+                                                  "high"
+                                                ],
+                                                "type": "string"
+                                              },
+                                              "url": {
+                                                "anyOf": [
+                                                  {
+                                                    "format": "uri",
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "pattern": "^data:image\\/(png|jpeg|jpg|webp);base64,",
+                                                    "type": "string"
+                                                  }
+                                                ]
+                                              }
+                                            },
+                                            "required": [
+                                              "url"
+                                            ],
+                                            "type": "object"
+                                          },
+                                          "type": {
+                                            "enum": [
+                                              "image_url"
+                                            ],
+                                            "type": "string"
+                                          }
+                                        },
+                                        "required": [
+                                          "type",
+                                          "image_url"
+                                        ],
+                                        "type": "object"
                                       }
-                                    },
-                                    "required": [
-                                      "type"
-                                    ],
-                                    "type": "object"
+                                    ]
                                   },
                                   "type": "array"
                                 }
@@ Expand Down @@

agents-api/src/__tests__/run/agents/Agent.test.ts

-Original file line number
+Diff line change
@@ Expand Up / @@ -129,6 +129,7 @@ const { @@
       getFullAgentDefinitionMock,
       agentHasArtifactComponentsMock,
       getToolsForAgentMock,
+      getFunctionToolsForSubAgentMock,
     } = vi.hoisted(() => {
       const getCredentialReferenceMock = vi.fn(() => vi.fn().mockResolvedValue(null));
       const getContextConfigByIdMock = vi.fn(() => vi.fn().mockResolvedValue(null));
@@ Expand All / @@ -149,6 +150,7 @@ const { @@
           pagination: { page: 1, limit: 10, total: 0, pages: 0 },
         })
       );
+      const getFunctionToolsForSubAgentMock = vi.fn().mockResolvedValue([]);
       return {
         getCredentialReferenceMock,
@@ Expand All / @@ -158,6 +160,7 @@ const { @@
         getFullAgentDefinitionMock,
         agentHasArtifactComponentsMock,
         getToolsForAgentMock,
+        getFunctionToolsForSubAgentMock,
       };
     });
@@ Expand All / @@ -176,6 +179,7 @@ vi.mock('@inkeep/agents-core', async (importOriginal) => { @@
         getFullAgentDefinition: getFullAgentDefinitionMock,
         agentHasArtifactComponents: agentHasArtifactComponentsMock,
         getToolsForAgent: getToolsForAgentMock,
+        getFunctionToolsForSubAgent: getFunctionToolsForSubAgentMock,
         createDatabaseClient: vi.fn().mockReturnValue({}),
         contextValidationMiddleware: vi.fn().mockReturnValue(async (c: any, next: any) => {
           c.set('validatedContext', {
@@ Expand Down Expand Up / @@ -1766,3 +1770,150 @@ describe('Agent Conditional Tool Availability', () => { @@
         expect(tools.get_reference_artifact).toBeDefined();
       });
     });
+    describe('Agent Image Support', () => {
+      let mockAgentConfig: AgentConfig;
+      let mockExecutionContext: any;
+      beforeEach(() => {
+        vi.clearAllMocks();
+        mockExecutionContext = createMockExecutionContext();
+        mockAgentConfig = {
+          id: 'test-agent',
+          projectId: 'test-project',
+          name: 'Test Agent',
+          description: 'Test agent for image support',
+          tenantId: 'test-tenant',
+          agentId: 'test-agent',
+          baseUrl: 'http://localhost:3000',
+          prompt: 'You are a helpful assistant that can analyze images.',
+          subAgentRelations: [],
+          transferRelations: [],
+          delegateRelations: [],
+          dataComponents: [],
+          tools: [],
+          functionTools: [],
+          models: {
+            base: {
+              model: 'anthropic/claude-sonnet-4-5',
+            },
+          },
+        };
+      });
+      test('passes text-only input to generateText (string or message object)', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        const { generateText } = await import('ai');
+        await agent.generate('Simple text prompt');
+        expect(generateText).toHaveBeenNthCalledWith(
+,
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.stringContaining('Simple text prompt'),
+              }),
+            ]),
+          })
+        );
+        await agent.generate({ text: 'Just text, no images' });
+        expect(generateText).toHaveBeenNthCalledWith(
+,
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.stringContaining('Just text, no images'),
+              }),
+            ]),
+          })
+        );
+      });
+      test('passes image URL(s) to generateText in AI SDK format with optional detail metadata', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        await agent.generate({
+          text: 'Compare these two screenshots',
+          imageParts: [
+            {
+              kind: 'file',
+              file: {
+                uri: 'https://example.com/before.png',
+                mimeType: 'image/png',
+              },
+            },
+            {
+              kind: 'file',
+              file: {
+                uri: 'https://example.com/after.png',
+                mimeType: 'image/png',
+              },
+              metadata: { detail: 'high' },
+            },
+          ],
+        });
+        const { generateText } = await import('ai');
+        expect(generateText).toHaveBeenCalledWith(
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.arrayContaining([
+                  expect.objectContaining({
+                    type: 'text',
+                    text: expect.stringContaining('Compare these two screenshots'),
+                  }),
+                  expect.objectContaining({ type: 'image', image: expect.any(URL) }),
+                  expect.objectContaining({
+                    type: 'image',
+                    image: expect.any(URL),
+                    experimental_providerMetadata: { openai: { imageDetail: 'high' } },
+                  }),
+                ]),
+              }),
+            ]),
+          })
+        );
+      });
+      test('passes base64 image data to generateText', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        const base64Data =
+          'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+        const expectedDataUrl = `data:image/png;base64,${base64Data}`;
+        await agent.generate({
+          text: 'Describe this screenshot',
+          imageParts: [
+            {
+              kind: 'file',
+              file: {
+                bytes: base64Data,
+                mimeType: 'image/png',
+              },
+            },
+          ],
+        });
+        const { generateText } = await import('ai');
+        expect(generateText).toHaveBeenCalledWith(
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.arrayContaining([
+                  expect.objectContaining({ type: 'text' }),
+                  expect.objectContaining({ type: 'image', image: expectedDataUrl }),
+                ]),
+              }),
+            ]),
+          })
+        );
+      });
+    });

feat: add image support #1737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

mike-inkeep wants to merge 4 commits into main from feat/image-support

+559 −53

-Original file line number
+Diff line change
@@ Expand Up / @@ -32663,6 +32663,51 @@ @@
                             "parts": {
                               "items": {
                                 "anyOf": [
+                                  {
+                                    "properties": {
+                                      "text": {
+                                        "type": "string"
+                                      },
+                                      "type": {
+                                        "enum": [
+                                          "text"
+                                        ],
+                                        "type": "string"
+                                      }
+                                    },
+                                    "required": [
+                                      "type",
+                                      "text"
+                                    ],
+                                    "type": "object"
+                                  },
+                                  {
+                                    "properties": {
+                                      "text": {
+                                        "anyOf": [
+                                          {
+                                            "format": "uri",
+                                            "type": "string"
+                                          },
+                                          {
+                                            "pattern": "^data:image\\/(png|jpeg|jpg|webp);base64,",
+                                            "type": "string"
+                                          }
+                                        ]
+                                      },
+                                      "type": {
+                                        "enum": [
+                                          "image"
+                                        ],
+                                        "type": "string"
+                                      }
+                                    },
+                                    "required": [
+                                      "type",
+                                      "text"
+                                    ],
+                                    "type": "object"
+                                  },
                                   {
                                     "properties": {
                                       "text": {
@@ Expand All / @@ -32672,8 +32717,6 @@ @@
                                         "anyOf": [
                                           {
                                             "enum": [
-                                              "text",
-                                              "image",
                                               "audio",
                                               "video",
                                               "file"
@@ Expand Down Expand Up / @@ -33264,19 +33307,69 @@ @@
                                 },
                                 {
                                   "items": {
-                                    "additionalProperties": false,
-                                    "properties": {
-                                      "text": {
-                                        "type": "string"
+                                    "oneOf": [
+                                      {
+                                        "properties": {
+                                          "text": {
+                                            "type": "string"
+                                          },
+                                          "type": {
+                                            "enum": [
+                                              "text"
+                                            ],
+                                            "type": "string"
+                                          }
+                                        },
+                                        "required": [
+                                          "type",
+                                          "text"
+                                        ],
+                                        "type": "object"
                                       },
-                                      "type": {
-                                        "type": "string"
+                                      {
+                                        "properties": {
+                                          "image_url": {
+                                            "properties": {
+                                              "detail": {
+                                                "enum": [
+                                                  "auto",
+                                                  "low",
+                                                  "high"
+                                                ],
+                                                "type": "string"
+                                              },
+                                              "url": {
+                                                "anyOf": [
+                                                  {
+                                                    "format": "uri",
+                                                    "type": "string"
+                                                  },
+                                                  {
+                                                    "pattern": "^data:image\\/(png|jpeg|jpg|webp);base64,",
+                                                    "type": "string"
+                                                  }
+                                                ]
+                                              }
+                                            },
+                                            "required": [
+                                              "url"
+                                            ],
+                                            "type": "object"
+                                          },
+                                          "type": {
+                                            "enum": [
+                                              "image_url"
+                                            ],
+                                            "type": "string"
+                                          }
+                                        },
+                                        "required": [
+                                          "type",
+                                          "image_url"
+                                        ],
+                                        "type": "object"
                                       }
-                                    },
-                                    "required": [
-                                      "type"
-                                    ],
-                                    "type": "object"
+                                    ]
                                   },
                                   "type": "array"
                                 }
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -129,6 +129,7 @@ const { @@
       getFullAgentDefinitionMock,
       agentHasArtifactComponentsMock,
       getToolsForAgentMock,
+      getFunctionToolsForSubAgentMock,
     } = vi.hoisted(() => {
       const getCredentialReferenceMock = vi.fn(() => vi.fn().mockResolvedValue(null));
       const getContextConfigByIdMock = vi.fn(() => vi.fn().mockResolvedValue(null));
@@ Expand All / @@ -149,6 +150,7 @@ const { @@
           pagination: { page: 1, limit: 10, total: 0, pages: 0 },
         })
       );
+      const getFunctionToolsForSubAgentMock = vi.fn().mockResolvedValue([]);
       return {
         getCredentialReferenceMock,
@@ Expand All / @@ -158,6 +160,7 @@ const { @@
         getFullAgentDefinitionMock,
         agentHasArtifactComponentsMock,
         getToolsForAgentMock,
+        getFunctionToolsForSubAgentMock,
       };
     });
@@ Expand All / @@ -176,6 +179,7 @@ vi.mock('@inkeep/agents-core', async (importOriginal) => { @@
         getFullAgentDefinition: getFullAgentDefinitionMock,
         agentHasArtifactComponents: agentHasArtifactComponentsMock,
         getToolsForAgent: getToolsForAgentMock,
+        getFunctionToolsForSubAgent: getFunctionToolsForSubAgentMock,
         createDatabaseClient: vi.fn().mockReturnValue({}),
         contextValidationMiddleware: vi.fn().mockReturnValue(async (c: any, next: any) => {
           c.set('validatedContext', {
@@ Expand Down Expand Up / @@ -1766,3 +1770,150 @@ describe('Agent Conditional Tool Availability', () => { @@
         expect(tools.get_reference_artifact).toBeDefined();
       });
     });
+    describe('Agent Image Support', () => {
+      let mockAgentConfig: AgentConfig;
+      let mockExecutionContext: any;
+      beforeEach(() => {
+        vi.clearAllMocks();
+        mockExecutionContext = createMockExecutionContext();
+        mockAgentConfig = {
+          id: 'test-agent',
+          projectId: 'test-project',
+          name: 'Test Agent',
+          description: 'Test agent for image support',
+          tenantId: 'test-tenant',
+          agentId: 'test-agent',
+          baseUrl: 'http://localhost:3000',
+          prompt: 'You are a helpful assistant that can analyze images.',
+          subAgentRelations: [],
+          transferRelations: [],
+          delegateRelations: [],
+          dataComponents: [],
+          tools: [],
+          functionTools: [],
+          models: {
+            base: {
+              model: 'anthropic/claude-sonnet-4-5',
+            },
+          },
+        };
+      });
+      test('passes text-only input to generateText (string or message object)', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        const { generateText } = await import('ai');
+        await agent.generate('Simple text prompt');
+        expect(generateText).toHaveBeenNthCalledWith(
+,
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.stringContaining('Simple text prompt'),
+              }),
+            ]),
+          })
+        );
+        await agent.generate({ text: 'Just text, no images' });
+        expect(generateText).toHaveBeenNthCalledWith(
+,
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.stringContaining('Just text, no images'),
+              }),
+            ]),
+          })
+        );
+      });
+      test('passes image URL(s) to generateText in AI SDK format with optional detail metadata', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        await agent.generate({
+          text: 'Compare these two screenshots',
+          imageParts: [
+            {
+              kind: 'file',
+              file: {
+                uri: 'https://example.com/before.png',
+                mimeType: 'image/png',
+              },
+            },
+            {
+              kind: 'file',
+              file: {
+                uri: 'https://example.com/after.png',
+                mimeType: 'image/png',
+              },
+              metadata: { detail: 'high' },
+            },
+          ],
+        });
+        const { generateText } = await import('ai');
+        expect(generateText).toHaveBeenCalledWith(
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.arrayContaining([
+                  expect.objectContaining({
+                    type: 'text',
+                    text: expect.stringContaining('Compare these two screenshots'),
+                  }),
+                  expect.objectContaining({ type: 'image', image: expect.any(URL) }),
+                  expect.objectContaining({
+                    type: 'image',
+                    image: expect.any(URL),
+                    experimental_providerMetadata: { openai: { imageDetail: 'high' } },
+                  }),
+                ]),
+              }),
+            ]),
+          })
+        );
+      });
+      test('passes base64 image data to generateText', async () => {
+        const agent = new Agent(mockAgentConfig, mockExecutionContext);
+        const base64Data =
+          'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+        const expectedDataUrl = `data:image/png;base64,${base64Data}`;
+        await agent.generate({
+          text: 'Describe this screenshot',
+          imageParts: [
+            {
+              kind: 'file',
+              file: {
+                bytes: base64Data,
+                mimeType: 'image/png',
+              },
+            },
+          ],
+        });
+        const { generateText } = await import('ai');
+        expect(generateText).toHaveBeenCalledWith(
+          expect.objectContaining({
+            messages: expect.arrayContaining([
+              expect.objectContaining({
+                role: 'user',
+                content: expect.arrayContaining([
+                  expect.objectContaining({ type: 'text' }),
+                  expect.objectContaining({ type: 'image', image: expectedDataUrl }),
+                ]),
+              }),
+            ]),
+          })
+        );
+      });
+    });

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add image support #1737

Diff view

Diff view

There are no files selected for viewing

Uh oh!

feat: add image support #1737

Are you sure you want to change the base?

feat: add image support #1737

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!