Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Sources/OpenAI/Private/Realtime/OpenAIRealtimeSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,44 @@ open class OpenAIRealtimeSession {
continuation?.yield(.inputAudioTranscriptionCompleted(transcript))
}

// MCP (Model Context Protocol) message types
case "mcp_list_tools.in_progress":
logger.debug("MCP: Tool discovery in progress")
continuation?.yield(.mcpListToolsInProgress)

case "mcp_list_tools.completed":
logger.debug("MCP: Tool discovery completed")
if let tools = json["tools"] as? [String: Any] {
continuation?.yield(.mcpListToolsCompleted(tools))
} else {
continuation?.yield(.mcpListToolsCompleted([:]))
}

case "mcp_list_tools.failed":
logger.error("MCP: Tool discovery failed")
logger.error("Full JSON payload: \(String(describing: json))")

let errorDetails = json["error"] as? [String: Any]
let errorMessage = errorDetails?["message"] as? String
let errorCode = errorDetails?["code"] as? String

// Also check for top-level error fields
let topLevelMessage = json["message"] as? String
let topLevelCode = json["code"] as? String
let topLevelReason = json["reason"] as? String

let finalMessage = errorMessage ?? topLevelMessage ?? topLevelReason ?? "Unknown MCP error"
let finalCode = errorCode ?? topLevelCode
let fullError = finalCode != nil ? "[\(finalCode!)] \(finalMessage)" : finalMessage

logger.error("MCP Error: \(fullError)")
logger.error("Error details: \(String(describing: errorDetails))")
logger
.error(
"Top-level fields: message=\(String(describing: topLevelMessage)), code=\(String(describing: topLevelCode)), reason=\(String(describing: topLevelReason))")

continuation?.yield(.mcpListToolsFailed(fullError))

default:
// Log unhandled message types for debugging
logger.debug("Unhandled message type: \(messageType)")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,40 @@ extension OpenAIRealtimeConversationItemCreate {

public init(role: String, text: String) {
self.role = role
content = [.init(text: text)]
content = [.text(text)]
}

public init(role: String, content: [Content]) {
self.role = role
self.content = content
}
}
}

// MARK: - OpenAIRealtimeConversationItemCreate.Item.Content

extension OpenAIRealtimeConversationItemCreate.Item {
public struct Content: Encodable {
public let type = "input_text"
public let text: String
public enum Content: Encodable {
case text(String)
case image(String) // base64 data URL: "data:image/{format};base64,{bytes}"

public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
switch self {
case .text(let text):
try container.encode("input_text", forKey: .type)
try container.encode(text, forKey: .text)

case .image(let imageUrl):
try container.encode("input_image", forKey: .type)
try container.encode(imageUrl, forKey: .imageUrl)
}
}

public init(text: String) {
self.text = text
private enum CodingKeys: String, CodingKey {
case type
case text
case imageUrl = "image_url"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {
outputAudioFormat: OpenAIRealtimeSessionConfiguration.AudioFormat? = nil,
speed: Float? = 1.0,
temperature: Double? = nil,
tools: [OpenAIRealtimeSessionConfiguration.Tool]? = nil,
tools: [OpenAIRealtimeSessionConfiguration.RealtimeTool]? = nil,
toolChoice: OpenAIRealtimeSessionConfiguration.ToolChoice? = nil,
turnDetection: OpenAIRealtimeSessionConfiguration.TurnDetection? = nil,
voice: String? = nil)
Expand Down Expand Up @@ -130,8 +130,8 @@ public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendable {
/// Sampling temperature for the model.
public let temperature: Double?

/// Tools (functions) available to the model.
public let tools: [Tool]?
/// Tools (functions and MCP servers) available to the model.
public let tools: [RealtimeTool]?

/// How the model chooses tools. Options are "auto", "none", "required", or specify a function.
public let toolChoice: ToolChoice?
Expand Down Expand Up @@ -191,10 +191,10 @@ extension OpenAIRealtimeSessionConfiguration {
}
}

// MARK: OpenAIRealtimeSessionConfiguration.Tool
// MARK: OpenAIRealtimeSessionConfiguration.FunctionTool

extension OpenAIRealtimeSessionConfiguration {
public struct Tool: Encodable, Sendable {
public struct FunctionTool: Encodable, Sendable {
/// The description of the function
public let description: String

Expand All @@ -215,6 +215,25 @@ extension OpenAIRealtimeSessionConfiguration {
}
}

// MARK: OpenAIRealtimeSessionConfiguration.RealtimeTool

extension OpenAIRealtimeSessionConfiguration {
/// Represents a tool that can be either a function or an MCP server
public enum RealtimeTool: Encodable, Sendable {
case function(FunctionTool)
case mcp(Tool.MCPTool)

public func encode(to encoder: Encoder) throws {
switch self {
case .function(let tool):
try tool.encode(to: encoder)
case .mcp(let mcpTool):
try mcpTool.encode(to: encoder)
}
}
}
}

// MARK: OpenAIRealtimeSessionConfiguration.TurnDetection

extension OpenAIRealtimeSessionConfiguration {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,9 @@ public enum OpenAIRealtimeMessage: Sendable {
case inputAudioBufferTranscript(String) // "input_audio_buffer.transcript"
case inputAudioTranscriptionDelta(String) // "conversation.item.input_audio_transcription.delta"
case inputAudioTranscriptionCompleted(String) // "conversation.item.input_audio_transcription.completed"

// MCP (Model Context Protocol) messages
case mcpListToolsInProgress // "mcp_list_tools.in_progress"
case mcpListToolsCompleted([String: Any]) // "mcp_list_tools.completed" with tools data
case mcpListToolsFailed(String?) // "mcp_list_tools.failed" with error details
}