Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ Support

Release Notes
-------------
### Upcoming
- Changes
- Firebase AI: Add support for receiving Live API Transcripts.

### 13.4.0
- Changes
- General: Update to Firebase C++ SDK version 13.2.0.
Expand Down
35 changes: 30 additions & 5 deletions firebaseai/src/LiveGenerationConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,32 @@ internal Dictionary<string, object> ToJson()
if (!string.IsNullOrWhiteSpace(voice))
{
dict["voiceConfig"] = new Dictionary<string, object>() {
{ "prebuiltVoiceConfig" , new Dictionary<string, object>() {
{ "voiceName", voice }
} }
};
{ "prebuiltVoiceConfig" , new Dictionary<string, object>() {
{ "voiceName", voice }
} }
};
}

return dict;
}
}

/// <summary>
/// A struct used to configure speech transcription settings.
/// </summary>
public readonly struct AudioTranscriptionConfig
{
/// <summary>
/// Intended for internal use only.
/// This method is used for serializing the object to JSON for the API request.
/// </summary>
internal Dictionary<string, object> ToJson()
{
Dictionary<string, object> dict = new();
return dict;
}
}

/// <summary>
/// A struct defining model parameters to be used when generating live session content.
/// </summary>
Expand All @@ -76,6 +92,11 @@ public readonly struct LiveGenerationConfig
private readonly int? _maxOutputTokens;
private readonly float? _presencePenalty;
private readonly float? _frequencyPenalty;
private readonly AudioTranscriptionConfig? _inputAudioTranscription;
private readonly AudioTranscriptionConfig? _outputAudioTranscription;

internal readonly AudioTranscriptionConfig? InputAudioTranscription => _inputAudioTranscription;
internal readonly AudioTranscriptionConfig? OutputAudioTranscription => _outputAudioTranscription;

/// <summary>
/// Creates a new `LiveGenerationConfig` value.
Expand Down Expand Up @@ -168,7 +189,9 @@ public LiveGenerationConfig(
float? topK = null,
int? maxOutputTokens = null,
float? presencePenalty = null,
float? frequencyPenalty = null)
float? frequencyPenalty = null,
AudioTranscriptionConfig? inputAudioTranscription = null,
AudioTranscriptionConfig? outputAudioTranscription = null)
{
_speechConfig = speechConfig;
_responseModalities = responseModalities != null ?
Expand All @@ -179,6 +202,8 @@ public LiveGenerationConfig(
_maxOutputTokens = maxOutputTokens;
_presencePenalty = presencePenalty;
_frequencyPenalty = frequencyPenalty;
_inputAudioTranscription = inputAudioTranscription;
_outputAudioTranscription = outputAudioTranscription;
}

/// <summary>
Expand Down
10 changes: 10 additions & 0 deletions firebaseai/src/LiveGenerativeModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,16 @@ public async Task<LiveSession> ConnectAsync(CancellationToken cancellationToken
if (_liveConfig != null)
{
setupDict["generationConfig"] = _liveConfig?.ToJson();

// Input/Output Transcriptions are defined on the config, but need to be set here.
if (_liveConfig?.InputAudioTranscription.HasValue ?? false)
{
setupDict["inputAudioTranscription"] = _liveConfig?.InputAudioTranscription?.ToJson();
}
if (_liveConfig?.OutputAudioTranscription.HasValue ?? false)
{
setupDict["outputAudioTranscription"] = _liveConfig?.OutputAudioTranscription?.ToJson();
}
}
if (_systemInstruction.HasValue)
{
Expand Down
46 changes: 44 additions & 2 deletions firebaseai/src/LiveSessionResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,26 @@ public interface ILiveSessionMessage { }
/// </summary>
public readonly bool Interrupted { get; }

private LiveSessionContent(ModelContent? content, bool turnComplete, bool interrupted)
/// <summary>
/// The input transcription. Note that the transcription is independent to
/// the Content, and doesn't imply any ordering between them.
/// </summary>
public readonly Transcription? InputTranscription { get; }

/// <summary>
/// The output transcription. Note that the transcription is independent to
/// the Content, and doesn't imply any ordering between them.
/// </summary>
public readonly Transcription? OutputTranscription { get; }

private LiveSessionContent(ModelContent? content, bool turnComplete, bool interrupted,
Transcription? input, Transcription? output)
{
Content = content;
TurnComplete = turnComplete;
Interrupted = interrupted;
InputTranscription = input;
OutputTranscription = output;
}

/// <summary>
Expand All @@ -195,7 +210,9 @@ internal static LiveSessionContent FromJson(Dictionary<string, object> jsonDict)
return new LiveSessionContent(
jsonDict.ParseNullableObject("modelTurn", ModelContent.FromJson),
jsonDict.ParseValue<bool>("turnComplete"),
jsonDict.ParseValue<bool>("interrupted")
jsonDict.ParseValue<bool>("interrupted"),
jsonDict.ParseNullableObject("inputTranscription", Transcription.FromJson),
jsonDict.ParseNullableObject("outputTranscription", Transcription.FromJson)
);
}
}
Expand Down Expand Up @@ -271,4 +288,29 @@ internal static LiveSessionToolCallCancellation FromJson(Dictionary<string, obje
}
}

/// <summary>
/// A transcription of the audio sent in a live session.
/// </summary>
public readonly struct Transcription
{
/// <summary>
/// The transcribed text.
/// </summary>
public readonly string Text { get; }

private Transcription(string text)
{
Text = text;
}

/// <summary>
/// Intended for internal use only.
/// This method is used for deserializing JSON responses and should not be called directly.
/// </summary>
internal static Transcription FromJson(Dictionary<string, object> jsonDict)
{
return new Transcription(jsonDict.ParseValue<string>("text"));
}
}

}