diff --git a/docs/plugins/google-genai.md b/docs/plugins/google-genai.md index 7e064622fe..4cb2bbd2a9 100644 --- a/docs/plugins/google-genai.md +++ b/docs/plugins/google-genai.md @@ -85,3 +85,28 @@ const embedding = await embed({ content: input, }); ``` + +## Gemini Files API + +You can use files uploaded to the Gemini Files API with Genkit: + +```js +import { GoogleAIFileManager } from '@google/generative-ai/server'; + +const fileManager = new GoogleAIFileManager(process.env.GOOGLE_GENAI_API_KEY); +const uploadResult = await fileManager.uploadFile( + 'path/to/file.jpg', + { + mimeType: 'image/jpeg', + displayName: 'Your Image', + } +); + +const response = await generate({ + model: gemini15Flash, + prompt: [ + {text: "Describe this image:}, + {media: {contentType: uploadResult.file.mimeType, url: uploadResult.file.uri}} + ] +}); +``` diff --git a/js/ai/src/model/middleware.ts b/js/ai/src/model/middleware.ts index 31add04396..5a16cc1f9d 100644 --- a/js/ai/src/model/middleware.ts +++ b/js/ai/src/model/middleware.ts @@ -15,7 +15,13 @@ */ import { Document } from '../document.js'; -import { MessageData, ModelInfo, ModelMiddleware, Part } from '../model.js'; +import { + MediaPart, + MessageData, + ModelInfo, + ModelMiddleware, + Part, +} from '../model.js'; /** * Preprocess a GenerateRequest to download referenced http(s) media URLs and @@ -23,6 +29,7 @@ import { MessageData, ModelInfo, ModelMiddleware, Part } from '../model.js'; */ export function downloadRequestMedia(options?: { maxBytes?: number; + filter?: (part: MediaPart) => boolean; }): ModelMiddleware { return async (req, next) => { const { default: fetch } = await import('node-fetch'); @@ -33,8 +40,13 @@ export function downloadRequestMedia(options?: { req.messages.map(async (message) => { const content: Part[] = await Promise.all( message.content.map(async (part) => { - // skip non-media parts and non-http urls - if (!part.media || !part.media.url.startsWith('http')) { + // skip non-media parts and non-http urls, or parts that have been + // filtered out by user config + if ( + !part.media || + !part.media.url.startsWith('http') || + (options?.filter && !options?.filter(part)) + ) { return part; } diff --git a/js/plugins/googleai/package.json b/js/plugins/googleai/package.json index 46f687a63d..8902dc50f2 100644 --- a/js/plugins/googleai/package.json +++ b/js/plugins/googleai/package.json @@ -31,7 +31,7 @@ "author": "genkit", "license": "Apache-2.0", "dependencies": { - "@google/generative-ai": "^0.15.0", + "@google/generative-ai": "^0.16.0", "google-auth-library": "^9.6.3", "node-fetch": "^3.3.2", "zod": "^3.22.4" diff --git a/js/plugins/googleai/src/gemini.ts b/js/plugins/googleai/src/gemini.ts index 73eaaa3ea7..a7e89f3120 100644 --- a/js/plugins/googleai/src/gemini.ts +++ b/js/plugins/googleai/src/gemini.ts @@ -36,6 +36,7 @@ import { } from '@genkit-ai/ai/model/middleware'; import { GENKIT_CLIENT_HEADER } from '@genkit-ai/core'; import { + FileDataPart, FunctionCallPart, FunctionDeclaration, FunctionDeclarationSchemaType, @@ -247,6 +248,16 @@ function toInlineData(part: MediaPart): InlineDataPart { return { inlineData: { mimeType: contentType, data: b64Data } }; } +function toFileData(part: MediaPart): FileDataPart { + if (!part.media.contentType) + throw new Error( + 'Must supply a `contentType` when sending File URIs to Gemini.' + ); + return { + fileData: { mimeType: part.media.contentType, fileUri: part.media.url }, + }; +} + function fromInlineData(inlinePart: InlineDataPart): MediaPart { // Check if the required properties exist if ( @@ -361,7 +372,10 @@ function toCustomPart(part: Part): GeminiPart { function toGeminiPart(part: Part): GeminiPart { if (part.text !== undefined) return { text: part.text }; - if (part.media) return toInlineData(part); + if (part.media) { + if (part.media.url.startsWith('data:')) return toInlineData(part); + return toFileData(part); + } if (part.toolRequest) return toFunctionCall(part); if (part.toolResponse) return toFunctionResponse(part); if (part.custom) return toCustomPart(part); @@ -466,7 +480,16 @@ export function googleAIModel( } if (model?.info?.supports?.media) { // the gemini api doesn't support downloading media from http(s) - middleware.push(downloadRequestMedia({ maxBytes: 1024 * 1024 * 10 })); + middleware.push( + downloadRequestMedia({ + maxBytes: 1024 * 1024 * 10, + // don't downlaod files that have been uploaded using the Files API + filter: (part) => + !part.media.url.startsWith( + 'https://generativelanguage.googleapis.com/' + ), + }) + ); } return defineModel( diff --git a/js/pnpm-lock.yaml b/js/pnpm-lock.yaml index b0a7c5954b..1b63ec57e0 100644 --- a/js/pnpm-lock.yaml +++ b/js/pnpm-lock.yaml @@ -459,8 +459,8 @@ importers: specifier: workspace:* version: link:../../core '@google/generative-ai': - specifier: ^0.15.0 - version: 0.15.0 + specifier: ^0.16.0 + version: 0.16.0 google-auth-library: specifier: ^9.6.3 version: 9.7.0(encoding@0.1.13) @@ -1029,6 +1029,9 @@ importers: '@genkit-ai/vertexai': specifier: workspace:* version: link:../../plugins/vertexai + '@google/generative-ai': + specifier: ^0.15.0 + version: 0.15.0 '@opentelemetry/sdk-trace-base': specifier: ^1.22.0 version: 1.22.0(@opentelemetry/api@1.8.0) @@ -1737,6 +1740,10 @@ packages: resolution: {integrity: sha512-zs37judcTYFJf1U7tnuqnh7gdzF6dcWj9pNRxjA5JTONRoiQ0htrRdbefRFiewOIfXwhun5t9hbd2ray7812eQ==} engines: {node: '>=18.0.0'} + '@google/generative-ai@0.16.0': + resolution: {integrity: sha512-doB5ZNxS6m+jUZqaLCeYXfBZCdq6Ho0ibkq5/17xe1qAUZpCLWlvCDGtqFPqqO+yezNmvGatS0KhV22yiOT3DA==} + engines: {node: '>=18.0.0'} + '@grpc/grpc-js@1.10.10': resolution: {integrity: sha512-HPa/K5NX6ahMoeBv15njAc/sfF4/jmiXLar9UlC2UfHFKZzsCVLc3wbe7+7qua7w9VPh2/L6EBxyAV7/E8Wftg==} engines: {node: '>=12.10.0'} @@ -3599,11 +3606,13 @@ packages: google-p12-pem@3.1.4: resolution: {integrity: sha512-HHuHmkLgwjdmVRngf5+gSmpkyaRI6QmOg77J8tkNBHhNEI62sGHyw4/+UkgyZEI7h84NbWprXDJ+sa3xOYFvTg==} engines: {node: '>=10'} + deprecated: Package is no longer maintained hasBin: true google-p12-pem@4.0.1: resolution: {integrity: sha512-WPkN4yGtz05WZ5EhtlxNDWPhC4JIic6G8ePitwUWy4l+XPVYec+a0j0Ts47PDtW59y3RwAhUd9/h9ZZ63px6RQ==} engines: {node: '>=12.0.0'} + deprecated: Package is no longer maintained hasBin: true google-proto-files@3.0.3: @@ -5578,6 +5587,8 @@ snapshots: '@google/generative-ai@0.15.0': {} + '@google/generative-ai@0.16.0': {} + '@grpc/grpc-js@1.10.10': dependencies: '@grpc/proto-loader': 0.7.13 diff --git a/js/testapps/flow-simple-ai/package.json b/js/testapps/flow-simple-ai/package.json index 8cba6c32e2..12112a488b 100644 --- a/js/testapps/flow-simple-ai/package.json +++ b/js/testapps/flow-simple-ai/package.json @@ -23,6 +23,7 @@ "@genkit-ai/google-cloud": "workspace:*", "@genkit-ai/googleai": "workspace:*", "@genkit-ai/vertexai": "workspace:*", + "@google/generative-ai": "^0.15.0", "@opentelemetry/sdk-trace-base": "^1.22.0", "firebase-admin": "^12.1.0", "partial-json": "^0.1.7", diff --git a/js/testapps/flow-simple-ai/src/index.ts b/js/testapps/flow-simple-ai/src/index.ts index b25f8f71bd..934fda755f 100644 --- a/js/testapps/flow-simple-ai/src/index.ts +++ b/js/testapps/flow-simple-ai/src/index.ts @@ -428,3 +428,40 @@ export const invalidOutput = defineFlow( return result.output() as any; } ); + +import { GoogleAIFileManager } from '@google/generative-ai/server'; +const fileManager = new GoogleAIFileManager( + process.env.GOOGLE_GENAI_API_KEY || process.env.GOOGLE_API_KEY! +); +export const fileApi = defineFlow( + { + name: 'fileApi', + inputSchema: z.string(), + outputSchema: z.string(), + }, + async () => { + const uploadResult = await fileManager.uploadFile( + '../menu/data/menu.jpeg', + { + mimeType: 'image/jpeg', + displayName: 'Restaurant Menu', + } + ); + console.log(uploadResult.file); + + const result = await generate({ + model: gemini15Flash, + prompt: [ + { text: 'Describe this image:' }, + { + media: { + contentType: uploadResult.file.mimeType, + url: uploadResult.file.uri, + }, + }, + ], + }); + + return result.text(); + } +);