diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 0334dc35f6b..b7db00bdde2 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -13,6 +13,7 @@ * [fixed] Fixed an issue with `LiveContentResponse` audio data not being present when the model was interrupted or the turn completed. (#6870) * [fixed] Fixed an issue with `LiveSession` not converting exceptions to `FirebaseVertexAIException`. (#6870) +* [feature] Enable response generation in multiple modalities. (#6901) # 16.3.0 diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt index ecc567e537f..cb3d14904b3 100644 --- a/firebase-vertexai/api.txt +++ b/firebase-vertexai/api.txt @@ -132,9 +132,9 @@ package com.google.firebase.vertexai.java { method public abstract com.google.common.util.concurrent.ListenableFuture send(String text); method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); - method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); - method public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.vertexai.java.LiveSessionFutures.Companion Companion; } @@ -330,11 +330,13 @@ package com.google.firebase.vertexai.type { ctor public GenerateContentResponse(java.util.List candidates, com.google.firebase.vertexai.type.PromptFeedback? promptFeedback, com.google.firebase.vertexai.type.UsageMetadata? usageMetadata); method public java.util.List getCandidates(); method public java.util.List getFunctionCalls(); + method public java.util.List getInlineDataParts(); method public com.google.firebase.vertexai.type.PromptFeedback? getPromptFeedback(); method public String? getText(); method public com.google.firebase.vertexai.type.UsageMetadata? getUsageMetadata(); property public final java.util.List candidates; property public final java.util.List functionCalls; + property public final java.util.List inlineDataParts; property public final com.google.firebase.vertexai.type.PromptFeedback? promptFeedback; property public final String? text; property public final com.google.firebase.vertexai.type.UsageMetadata? usageMetadata; @@ -352,6 +354,7 @@ package com.google.firebase.vertexai.type { field public Integer? maxOutputTokens; field public Float? presencePenalty; field public String? responseMimeType; + field public java.util.List? responseModalities; field public com.google.firebase.vertexai.type.Schema? responseSchema; field public java.util.List? stopSequences; field public Float? temperature; @@ -690,7 +693,7 @@ package com.google.firebase.vertexai.type { public final class RequestTimeoutException extends com.google.firebase.vertexai.type.FirebaseVertexAIException { } - @com.google.firebase.vertexai.type.PublicPreviewAPI public final class ResponseModality { + public final class ResponseModality { method public int getOrdinal(); property public final int ordinal; field public static final com.google.firebase.vertexai.type.ResponseModality AUDIO; diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt index 00395252914..91c2cd8f4ea 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerateContentResponse.kt @@ -44,6 +44,18 @@ public class GenerateContentResponse( candidates.first().content.parts.filterIsInstance() } + /** + * Convenience field representing all the [InlineDataPart]s in the first candidate, if they exist. + * + * This also includes any [ImagePart], but they will be represented as [InlineDataPart] instead. + */ + public val inlineDataParts: List by lazy { + candidates.first().content.parts.let { parts -> + parts.filterIsInstance().map { it.toInlineDataPart() } + + parts.filterIsInstance() + } + } + @Serializable internal data class Internal( val candidates: List? = null, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt index 4abec8a260d..88705c58a92 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/GenerationConfig.kt @@ -69,6 +69,8 @@ import kotlinx.serialization.Serializable * @property responseSchema Output schema of the generated candidate text. If set, a compatible * [responseMimeType] must also be set. * + * @property responseModalities The format of data in which the model should respond with. + * * Compatible MIME types: * - `application/json`: Schema for JSON response. * @@ -88,6 +90,7 @@ private constructor( internal val stopSequences: List?, internal val responseMimeType: String?, internal val responseSchema: Schema?, + internal val responseModalities: List?, ) { /** @@ -115,6 +118,9 @@ private constructor( * @property responseMimeType See [GenerationConfig.responseMimeType]. * * @property responseSchema See [GenerationConfig.responseSchema]. + * + * @property responseModalities See [GenerationConfig.responseModalities]. + * * @see [generationConfig] */ public class Builder { @@ -128,6 +134,7 @@ private constructor( @JvmField public var stopSequences: List? = null @JvmField public var responseMimeType: String? = null @JvmField public var responseSchema: Schema? = null + @JvmField public var responseModalities: List? = null /** Create a new [GenerationConfig] with the attached arguments. */ public fun build(): GenerationConfig = @@ -142,6 +149,7 @@ private constructor( frequencyPenalty = frequencyPenalty, responseMimeType = responseMimeType, responseSchema = responseSchema, + responseModalities = responseModalities ) } @@ -156,7 +164,8 @@ private constructor( frequencyPenalty = frequencyPenalty, presencePenalty = presencePenalty, responseMimeType = responseMimeType, - responseSchema = responseSchema?.toInternal() + responseSchema = responseSchema?.toInternal(), + responseModalities = responseModalities?.map { it.toInternal() } ) @Serializable @@ -171,6 +180,7 @@ private constructor( @SerialName("presence_penalty") val presencePenalty: Float? = null, @SerialName("frequency_penalty") val frequencyPenalty: Float? = null, @SerialName("response_schema") val responseSchema: Schema.Internal? = null, + @SerialName("response_modalities") val responseModalities: List? = null ) public companion object { diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt index b2538a8d6a0..efd130c85ca 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt @@ -45,7 +45,14 @@ public class TextPart(public val text: String) : Part { * * @param image [Bitmap] to convert into a [Part] */ -public class ImagePart(public val image: Bitmap) : Part +public class ImagePart(public val image: Bitmap) : Part { + + internal fun toInlineDataPart() = + InlineDataPart( + android.util.Base64.decode(encodeBitmapToBase64Png(image), BASE_64_FLAGS), + "image/jpeg" + ) +} /** * Represents binary data with an associated MIME type sent to and received from requests. diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt index e6be477f845..09343755216 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/ResponseModality.kt @@ -21,7 +21,6 @@ import kotlinx.serialization.KSerializer import kotlinx.serialization.Serializable /** Represents the type of content present in a response (e.g., text, image, audio). */ -@PublicPreviewAPI public class ResponseModality private constructor(public val ordinal: Int) { @Serializable(Internal.Serializer::class)