Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions Sources/OpenAI/Private/Audio/MicrophonePCMSampleVendorAE.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class MicrophonePCMSampleVendorAE: MicrophonePCMSampleVendor {
logger.debug("MicrophonePCMSampleVendorAE is being freed")
}

public func start() throws -> AsyncStream<AVAudioPCMBuffer> {
func start() throws -> AsyncStream<AVAudioPCMBuffer> {
guard
let desiredTapFormat = AVAudioFormat(
commonFormat: .pcmFormatInt16,
Expand All @@ -87,14 +87,10 @@ class MicrophonePCMSampleVendorAE: MicrophonePCMSampleVendor {
return AsyncStream<AVAudioPCMBuffer> { [weak self] continuation in
guard let this = self else { return }
this.continuation = continuation
this.inputNode.installTap(onBus: 0, bufferSize: targetBufferSize, format: desiredTapFormat) { [weak this] sampleBuffer, _ in
if let accumulatedBuffer = this?.microphonePCMSampleVendorCommon.resampleAndAccumulate(sampleBuffer) {
// If the buffer has accumulated to a sufficient level, give it back to the caller
Task { @RealtimeActor in
this?.continuation?.yield(accumulatedBuffer)
}
}
}
this.installTapNonIsolated(
inputNode: this.inputNode,
bufferSize: targetBufferSize,
format: desiredTapFormat)
}
}

Expand All @@ -111,5 +107,22 @@ class MicrophonePCMSampleVendorAE: MicrophonePCMSampleVendor {
private let microphonePCMSampleVendorCommon = MicrophonePCMSampleVendorCommon()
private var continuation: AsyncStream<AVAudioPCMBuffer>.Continuation?

private nonisolated func installTapNonIsolated(
inputNode: AVAudioInputNode,
bufferSize: AVAudioFrameCount,
format: AVAudioFormat)
{
inputNode.installTap(onBus: 0, bufferSize: bufferSize, format: format) { [weak self] sampleBuffer, _ in
guard let self else { return }
Task { await self.processBuffer(sampleBuffer) }
}
}

private func processBuffer(_ buffer: AVAudioPCMBuffer) {
if let accumulatedBuffer = microphonePCMSampleVendorCommon.resampleAndAccumulate(buffer) {
continuation?.yield(accumulatedBuffer)
}
}

}
#endif
56 changes: 56 additions & 0 deletions Sources/OpenAI/Private/Realtime/OpenAIRealtimeSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,62 @@ open class OpenAIRealtimeSession {
logger.warning("Received response.done with unexpected format")
}

case "response.text.delta":
if let delta = json["delta"] as? String {
continuation?.yield(.responseTextDelta(delta))
}

case "response.text.done":
if let text = json["text"] as? String {
continuation?.yield(.responseTextDone(text))
}

case "response.output_item.added":
if
let item = json["item"] as? [String: Any],
let itemId = item["id"] as? String,
let type = item["type"] as? String
{
continuation?.yield(.responseOutputItemAdded(itemId: itemId, type: type))
}

case "response.output_item.done":
if
let item = json["item"] as? [String: Any],
let itemId = item["id"] as? String,
let type = item["type"] as? String
{
let content = item["content"] as? [[String: Any]]
continuation?.yield(.responseOutputItemDone(itemId: itemId, type: type, content: content))
}

case "response.content_part.added":
if
let part = json["part"] as? [String: Any],
let type = part["type"] as? String
{
continuation?.yield(.responseContentPartAdded(type: type))
}

case "response.content_part.done":
if
let part = json["part"] as? [String: Any],
let type = part["type"] as? String
{
let text = part["text"] as? String
continuation?.yield(.responseContentPartDone(type: type, text: text))
}

case "conversation.item.created":
if
let item = json["item"] as? [String: Any],
let itemId = item["id"] as? String,
let type = item["type"] as? String
{
let role = item["role"] as? String
continuation?.yield(.conversationItemCreated(itemId: itemId, type: type, role: role))
}

default:
// Log unhandled message types with more detail for debugging
logger.warning("⚠️ Unhandled message type: \(messageType)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
/// Realtime session configuration
/// https://platform.openai.com/docs/api-reference/realtime-client-events/session/update#realtime-client-events/session/update-session
public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {

public init(
inputAudioFormat: OpenAIRealtimeSessionConfiguration.AudioFormat? = nil,
inputAudioTranscription: OpenAIRealtimeSessionConfiguration.InputAudioTranscription? = nil,
Expand Down Expand Up @@ -41,7 +40,6 @@ public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {
}

public enum ToolChoice: Encodable, Sendable {

/// The model will not call any tool and instead generates a message.
/// This is the default when no tools are present in the request body
case none
Expand Down Expand Up @@ -73,22 +71,14 @@ public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {
case .specific(let functionName):
var container = encoder.container(keyedBy: RootKey.self)
try container.encode("function", forKey: .type)
var functionContainer = container.nestedContainer(
keyedBy: FunctionKey.self,
forKey: .function)
try functionContainer.encode(functionName, forKey: .name)
try container.encode(functionName, forKey: .name)
}
}

private enum RootKey: CodingKey {
case type
case function
}

private enum FunctionKey: CodingKey {
case name
}

}

/// The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
Expand Down Expand Up @@ -157,7 +147,6 @@ public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {
case turnDetection = "turn_detection"
case voice
}

}

// MARK: OpenAIRealtimeSessionConfiguration.InputAudioTranscription
Expand Down Expand Up @@ -238,7 +227,6 @@ extension OpenAIRealtimeSessionConfiguration {

extension OpenAIRealtimeSessionConfiguration {
public struct TurnDetection: Encodable, Sendable {

public init(
type: DetectionType)
{
Expand Down Expand Up @@ -270,7 +258,6 @@ extension OpenAIRealtimeSessionConfiguration {
case type
case eagerness
}

}
}

Expand Down Expand Up @@ -321,6 +308,5 @@ extension OpenAIRealtimeSessionConfiguration.TurnDetection {
case medium
case high
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,19 @@ public enum OpenAIRealtimeMessage: Sendable {

/// Response completion with potential errors
case responseDone(status: String, statusDetails: [String: Any]?) // "response.done"

// Text streaming (for text-only responses)
case responseTextDelta(String) // "response.text.delta"
case responseTextDone(String) // "response.text.done"

// Output item lifecycle
case responseOutputItemAdded(itemId: String, type: String) // "response.output_item.added"
case responseOutputItemDone(itemId: String, type: String, content: [[String: Any]]?) // "response.output_item.done"

// Content part lifecycle
case responseContentPartAdded(type: String) // "response.content_part.added"
case responseContentPartDone(type: String, text: String?) // "response.content_part.done"

/// Conversation item
case conversationItemCreated(itemId: String, type: String, role: String?) // "conversation.item.created"
}