diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts index 8c3eda73..b9ebafad 100644 --- a/src/lib/tokenizer.ts +++ b/src/lib/tokenizer.ts @@ -73,6 +73,9 @@ const calculateMessageTokens = ( const tokensPerName = 1 let tokens = tokensPerMessage for (const [key, value] of Object.entries(message)) { + if (key === "reasoning_opaque") { + continue + } if (typeof value === "string") { tokens += encoder.encode(value).length } diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc..2fb7849f 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + signature: string } export type AnthropicUserContentBlock = @@ -196,6 +197,7 @@ export interface AnthropicStreamState { messageStartSent: boolean contentBlockIndex: number contentBlockOpen: boolean + thinkingBlockOpen: boolean toolCalls: { [openAIToolIndex: number]: { id: string diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf624..a40d3f1d 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -60,6 +60,7 @@ export async function handleCompletion(c: Context) { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } for await (const rawEvent of response) { diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e638..94a0f7e1 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -139,25 +139,26 @@ function handleAssistantMessage( (block): block is AnthropicToolUseBlock => block.type === "tool_use", ) - const textBlocks = message.content.filter( - (block): block is AnthropicTextBlock => block.type === "text", - ) - const thinkingBlocks = message.content.filter( (block): block is AnthropicThinkingBlock => block.type === "thinking", ) - // Combine text and thinking blocks, as OpenAI doesn't have separate thinking blocks - const allTextContent = [ - ...textBlocks.map((b) => b.text), - ...thinkingBlocks.map((b) => b.thinking), - ].join("\n\n") + const allThinkingContent = thinkingBlocks + .filter((b) => b.thinking && b.thinking.length > 0) + .map((b) => b.thinking) + .join("\n\n") + + const signature = thinkingBlocks.find( + (b) => b.signature && b.signature.length > 0, + )?.signature return toolUseBlocks.length > 0 ? [ { role: "assistant", - content: allTextContent || null, + content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, tool_calls: toolUseBlocks.map((toolUse) => ({ id: toolUse.id, type: "function", @@ -172,6 +173,8 @@ function handleAssistantMessage( { role: "assistant", content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, }, ] } @@ -191,11 +194,8 @@ function mapContent( const hasImage = content.some((block) => block.type === "image") if (!hasImage) { return content - .filter( - (block): block is AnthropicTextBlock | AnthropicThinkingBlock => - block.type === "text" || block.type === "thinking", - ) - .map((block) => (block.type === "text" ? block.text : block.thinking)) + .filter((block): block is AnthropicTextBlock => block.type === "text") + .map((block) => block.text) .join("\n\n") } @@ -204,12 +204,6 @@ function mapContent( switch (block.type) { case "text": { contentParts.push({ type: "text", text: block.text }) - - break - } - case "thinking": { - contentParts.push({ type: "text", text: block.thinking }) - break } case "image": { @@ -219,7 +213,6 @@ function mapContent( url: `data:${block.source.media_type};base64,${block.source.data}`, }, }) - break } // No default @@ -282,19 +275,19 @@ export function translateToAnthropic( response: ChatCompletionResponse, ): AnthropicResponse { // Merge content from all choices - const allTextBlocks: Array = [] - const allToolUseBlocks: Array = [] - let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null = - null // default - stopReason = response.choices[0]?.finish_reason ?? stopReason + const assistantContentBlocks: Array = [] + let stopReason = response.choices[0]?.finish_reason ?? null // Process all choices to extract text and tool use blocks for (const choice of response.choices) { const textBlocks = getAnthropicTextBlocks(choice.message.content) + const thingBlocks = getAnthropicThinkBlocks( + choice.message.reasoning_text, + choice.message.reasoning_opaque, + ) const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls) - allTextBlocks.push(...textBlocks) - allToolUseBlocks.push(...toolUseBlocks) + assistantContentBlocks.push(...thingBlocks, ...textBlocks, ...toolUseBlocks) // Use the finish_reason from the first choice, or prioritize tool_calls if (choice.finish_reason === "tool_calls" || stopReason === "stop") { @@ -302,14 +295,12 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses - return { id: response.id, type: "message", role: "assistant", model: response.model, - content: [...allTextBlocks, ...allToolUseBlocks], + content: assistantContentBlocks, stop_reason: mapOpenAIStopReasonToAnthropic(stopReason), stop_sequence: null, usage: { @@ -329,7 +320,7 @@ export function translateToAnthropic( function getAnthropicTextBlocks( messageContent: Message["content"], ): Array { - if (typeof messageContent === "string") { + if (typeof messageContent === "string" && messageContent.length > 0) { return [{ type: "text", text: messageContent }] } @@ -342,6 +333,31 @@ function getAnthropicTextBlocks( return [] } +function getAnthropicThinkBlocks( + reasoningText: string | null | undefined, + reasoningOpaque: string | null | undefined, +): Array { + if (reasoningText && reasoningText.length > 0) { + return [ + { + type: "thinking", + thinking: reasoningText, + signature: reasoningOpaque || "", + }, + ] + } + if (reasoningOpaque && reasoningOpaque.length > 0) { + return [ + { + type: "thinking", + thinking: "", + signature: reasoningOpaque, + }, + ] + } + return [] +} + function getAnthropicToolUseBlocks( toolCalls: Array | undefined, ): Array { diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts index 55094448..6002d510 100644 --- a/src/routes/messages/stream-translation.ts +++ b/src/routes/messages/stream-translation.ts @@ -1,4 +1,8 @@ -import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions" +import { + type ChatCompletionChunk, + type Choice, + type Delta, +} from "~/services/copilot/create-chat-completions" import { type AnthropicStreamEventData, @@ -16,7 +20,6 @@ function isToolBlockOpen(state: AnthropicStreamState): boolean { ) } -// eslint-disable-next-line max-lines-per-function, complexity export function translateChunkToAnthropicEvents( chunk: ChatCompletionChunk, state: AnthropicStreamState, @@ -30,22 +33,54 @@ export function translateChunkToAnthropicEvents( const choice = chunk.choices[0] const { delta } = choice - if (!state.messageStartSent) { - events.push({ - type: "message_start", - message: { - id: chunk.id, - type: "message", - role: "assistant", - content: [], - model: chunk.model, - stop_reason: null, - stop_sequence: null, + handleMessageStart(state, events, chunk) + + handleThinkingText(delta, state, events) + + handleContent(delta, state, events) + + handleToolCalls(delta, state, events) + + handleFinish(choice, state, { events, chunk }) + + return events +} + +function handleFinish( + choice: Choice, + state: AnthropicStreamState, + context: { + events: Array + chunk: ChatCompletionChunk + }, +) { + const { events, chunk } = context + if (choice.finish_reason && choice.finish_reason.length > 0) { + if (state.contentBlockOpen) { + const toolBlockOpen = isToolBlockOpen(state) + context.events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockOpen = false + state.contentBlockIndex++ + if (!toolBlockOpen) { + handleReasoningOpaque(choice.delta, events, state) + } + } + + events.push( + { + type: "message_delta", + delta: { + stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), + stop_sequence: null, + }, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: 0, // Will be updated in message_delta when finished + output_tokens: chunk.usage?.completion_tokens ?? 0, ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -53,44 +88,23 @@ export function translateChunkToAnthropicEvents( }), }, }, - }) - state.messageStartSent = true + { + type: "message_stop", + }, + ) } +} - if (delta.content) { - if (isToolBlockOpen(state)) { - // A tool block was open, so close it before starting a text block. - events.push({ - type: "content_block_stop", - index: state.contentBlockIndex, - }) - state.contentBlockIndex++ - state.contentBlockOpen = false - } - - if (!state.contentBlockOpen) { - events.push({ - type: "content_block_start", - index: state.contentBlockIndex, - content_block: { - type: "text", - text: "", - }, - }) - state.contentBlockOpen = true - } +function handleToolCalls( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.tool_calls && delta.tool_calls.length > 0) { + closeThinkingBlockIfOpen(state, events) - events.push({ - type: "content_block_delta", - index: state.contentBlockIndex, - delta: { - type: "text_delta", - text: delta.content, - }, - }) - } + handleReasoningOpaqueInToolCalls(state, events, delta) - if (delta.tool_calls) { for (const toolCall of delta.tool_calls) { if (toolCall.id && toolCall.function?.name) { // New tool call starting. @@ -141,28 +155,86 @@ export function translateChunkToAnthropicEvents( } } } +} - if (choice.finish_reason) { - if (state.contentBlockOpen) { +function handleReasoningOpaqueInToolCalls( + state: AnthropicStreamState, + events: Array, + delta: Delta, +) { + if (state.contentBlockOpen && !isToolBlockOpen(state)) { + events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockIndex++ + state.contentBlockOpen = false + } + handleReasoningOpaque(delta, events, state) +} + +function handleContent( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.content && delta.content.length > 0) { + closeThinkingBlockIfOpen(state, events) + + if (isToolBlockOpen(state)) { + // A tool block was open, so close it before starting a text block. events.push({ type: "content_block_stop", index: state.contentBlockIndex, }) + state.contentBlockIndex++ state.contentBlockOpen = false } - events.push( - { - type: "message_delta", - delta: { - stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), - stop_sequence: null, + if (!state.contentBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "text", + text: "", }, + }) + state.contentBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "text_delta", + text: delta.content, + }, + }) + } +} + +function handleMessageStart( + state: AnthropicStreamState, + events: Array, + chunk: ChatCompletionChunk, +) { + if (!state.messageStartSent) { + events.push({ + type: "message_start", + message: { + id: chunk.id, + type: "message", + role: "assistant", + content: [], + model: chunk.model, + stop_reason: null, + stop_sequence: null, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: chunk.usage?.completion_tokens ?? 0, + output_tokens: 0, // Will be updated in message_delta when finished ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -170,13 +242,121 @@ export function translateChunkToAnthropicEvents( }), }, }, + }) + state.messageStartSent = true + } +} + +function handleReasoningOpaque( + delta: Delta, + events: Array, + state: AnthropicStreamState, +) { + if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) { + events.push( { - type: "message_stop", + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: delta.reasoning_opaque, + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, }, ) + state.contentBlockIndex++ } +} - return events +function handleThinkingText( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.reasoning_text && delta.reasoning_text.length > 0) { + if (!state.thinkingBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }) + state.thinkingBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: delta.reasoning_text, + }, + }) + + if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) { + events.push( + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: delta.reasoning_opaque, + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } + } +} + +function closeThinkingBlockIfOpen( + state: AnthropicStreamState, + events: Array, +): void { + if (state.thinkingBlockOpen) { + events.push( + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: "", + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } } export function translateErrorToAnthropicErrorEvent(): AnthropicStreamEventData { diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151d..e848b27a 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -69,7 +69,7 @@ export interface ChatCompletionChunk { } } -interface Delta { +export interface Delta { content?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ @@ -81,9 +81,11 @@ interface Delta { arguments?: string } }> + reasoning_text?: string | null + reasoning_opaque?: string | null } -interface Choice { +export interface Choice { index: number delta: Delta finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null @@ -112,6 +114,8 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_text?: string | null + reasoning_opaque?: string | null tool_calls?: Array } @@ -166,6 +170,8 @@ export interface Message { name?: string tool_calls?: Array tool_call_id?: string + reasoning_text?: string | null + reasoning_opaque?: string | null } export interface ToolCall { diff --git a/src/start.ts b/src/start.ts index 14abbbdf..171d4ac9 100644 --- a/src/start.ts +++ b/src/start.ts @@ -117,6 +117,9 @@ export async function runServer(options: RunServerOptions): Promise { serve({ fetch: server.fetch as ServerHandler, port: options.port, + bun: { + idleTimeout: 0, + }, }) } diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c66377..baed2f6d 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "thinking", thinking: "Let me think about this simple math problem...", + signature: "abc123", }, { type: "text", text: "2+2 equals 4." }, ], @@ -150,7 +151,7 @@ describe("Anthropic to OpenAI translation logic", () => { const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( + expect(assistantMessage?.reasoning_text).toContain( "Let me think about this simple math problem...", ) expect(assistantMessage?.content).toContain("2+2 equals 4.") @@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => { type: "thinking", thinking: "I need to call the weather API to get current weather information.", + signature: "def456", }, { type: "text", text: "I'll check the weather for you." }, { @@ -188,7 +190,7 @@ describe("Anthropic to OpenAI translation logic", () => { const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( + expect(assistantMessage?.reasoning_text).toContain( "I need to call the weather API", ) expect(assistantMessage?.content).toContain( diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts index ecd71aac..e849a02a 100644 --- a/tests/anthropic-response.test.ts +++ b/tests/anthropic-response.test.ts @@ -252,6 +252,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState), @@ -352,6 +353,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState),