Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Sources/MLXServerKit/ChatCompletionsHandler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,18 @@ enum ChatCompletionsHandler {
roleSent = true
try await writer.write(
SSE.event(chunk(id, created, model, delta, finishReason: nil)))
case .finished(let reason, _):
case .finished(let reason, let usage):
let delta = ChatCompletionChunk.Delta(
role: nil, content: nil, toolCalls: nil)
try await writer.write(
SSE.event(chunk(id, created, model, delta, finishReason: reason)))
// OpenAI-style trailing usage chunk: empty choices,
// populated usage. Lets clients report context-window
// consumption for a streamed turn.
try await writer.write(
SSE.event(ChatCompletionChunk(
id: id, created: created, model: model,
choices: [], usage: usage)))
}
}
try await writer.write(SSE.done())
Expand Down
3 changes: 3 additions & 0 deletions Sources/MLXServerKit/OpenAITypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ public struct ChatCompletionChunk: Encodable, Sendable {
public var created: Int
public var model: String
public var choices: [ChunkChoice]
/// Token usage. Populated only on the final chunk of a stream so clients
/// can report context-window consumption; omitted on all other chunks.
public var usage: Usage?

public struct ChunkChoice: Encodable, Sendable {
public var index: Int
Expand Down
2 changes: 2 additions & 0 deletions Tests/MLXServerTests/RoutesTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ struct RoutesTests {
let text = String(buffer: response.body)
#expect(text.contains("chat.completion.chunk"))
#expect(text.contains("data: [DONE]"))
// The stream must carry a trailing usage chunk for context %.
#expect(text.contains("\"prompt_tokens\""))
}
}

Expand Down
Loading