diff --git a/Sources/MLXServerKit/ChatCompletionsHandler.swift b/Sources/MLXServerKit/ChatCompletionsHandler.swift index 372180f..540ad44 100644 --- a/Sources/MLXServerKit/ChatCompletionsHandler.swift +++ b/Sources/MLXServerKit/ChatCompletionsHandler.swift @@ -77,11 +77,18 @@ enum ChatCompletionsHandler { roleSent = true try await writer.write( SSE.event(chunk(id, created, model, delta, finishReason: nil))) - case .finished(let reason, _): + case .finished(let reason, let usage): let delta = ChatCompletionChunk.Delta( role: nil, content: nil, toolCalls: nil) try await writer.write( SSE.event(chunk(id, created, model, delta, finishReason: reason))) + // OpenAI-style trailing usage chunk: empty choices, + // populated usage. Lets clients report context-window + // consumption for a streamed turn. + try await writer.write( + SSE.event(ChatCompletionChunk( + id: id, created: created, model: model, + choices: [], usage: usage))) } } try await writer.write(SSE.done()) diff --git a/Sources/MLXServerKit/OpenAITypes.swift b/Sources/MLXServerKit/OpenAITypes.swift index a7f6db0..93f66a1 100644 --- a/Sources/MLXServerKit/OpenAITypes.swift +++ b/Sources/MLXServerKit/OpenAITypes.swift @@ -249,6 +249,9 @@ public struct ChatCompletionChunk: Encodable, Sendable { public var created: Int public var model: String public var choices: [ChunkChoice] + /// Token usage. Populated only on the final chunk of a stream so clients + /// can report context-window consumption; omitted on all other chunks. + public var usage: Usage? public struct ChunkChoice: Encodable, Sendable { public var index: Int diff --git a/Tests/MLXServerTests/RoutesTests.swift b/Tests/MLXServerTests/RoutesTests.swift index 5c62df7..1db6875 100644 --- a/Tests/MLXServerTests/RoutesTests.swift +++ b/Tests/MLXServerTests/RoutesTests.swift @@ -97,6 +97,8 @@ struct RoutesTests { let text = String(buffer: response.body) #expect(text.contains("chat.completion.chunk")) #expect(text.contains("data: [DONE]")) + // The stream must carry a trailing usage chunk for context %. + #expect(text.contains("\"prompt_tokens\"")) } }