-
Notifications
You must be signed in to change notification settings - Fork 2.5k
refactor: Update how we look for finish_reason #9046
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -402,15 +402,9 @@ def _handle_stream_response(self, chat_completion: Stream, callback: SyncStreami | |
| chunk_delta: StreamingChunk | ||
|
|
||
| for chunk in chat_completion: # pylint: disable=not-an-iterable | ||
| # choices is an empty array for usage_chunk when include_usage is set to True | ||
| if chunk.usage is not None: | ||
| chunk_delta = self._convert_usage_chunk_to_streaming_chunk(chunk) | ||
|
|
||
| else: | ||
| assert len(chunk.choices) == 1, "Streaming responses should have only one choice." | ||
| chunk_delta = self._convert_chat_completion_chunk_to_streaming_chunk(chunk) | ||
| assert len(chunk.choices) <= 1, "Streaming responses should have at most one choice." | ||
| chunk_delta = self._convert_chat_completion_chunk_to_streaming_chunk(chunk) | ||
| chunks.append(chunk_delta) | ||
|
|
||
| callback(chunk_delta) | ||
| return [self._convert_streaming_chunks_to_chat_message(chunk, chunks)] | ||
|
|
||
|
|
@@ -422,15 +416,9 @@ async def _handle_async_stream_response( | |
| chunk_delta: StreamingChunk | ||
|
|
||
| async for chunk in chat_completion: # pylint: disable=not-an-iterable | ||
| # choices is an empty array for usage_chunk when include_usage is set to True | ||
| if chunk.usage is not None: | ||
| chunk_delta = self._convert_usage_chunk_to_streaming_chunk(chunk) | ||
|
|
||
| else: | ||
| assert len(chunk.choices) == 1, "Streaming responses should have only one choice." | ||
| chunk_delta = self._convert_chat_completion_chunk_to_streaming_chunk(chunk) | ||
| assert len(chunk.choices) <= 1, "Streaming responses should have at most one choice." | ||
| chunk_delta = self._convert_chat_completion_chunk_to_streaming_chunk(chunk) | ||
| chunks.append(chunk_delta) | ||
|
|
||
| await callback(chunk_delta) | ||
| return [self._convert_streaming_chunks_to_chat_message(chunk, chunks)] | ||
|
|
||
|
|
@@ -450,12 +438,12 @@ def _check_finish_reason(self, meta: Dict[str, Any]) -> None: | |
| ) | ||
|
|
||
| def _convert_streaming_chunks_to_chat_message( | ||
| self, chunk: ChatCompletionChunk, chunks: List[StreamingChunk] | ||
| self, last_chunk: ChatCompletionChunk, chunks: List[StreamingChunk] | ||
| ) -> ChatMessage: | ||
| """ | ||
| Connects the streaming chunks into a single ChatMessage. | ||
|
|
||
| :param chunk: The last chunk returned by the OpenAI API. | ||
| :param last_chunk: The last chunk returned by the OpenAI API. | ||
| :param chunks: The list of all `StreamingChunk` objects. | ||
|
|
||
| :returns: The ChatMessage. | ||
|
|
@@ -498,15 +486,18 @@ def _convert_streaming_chunks_to_chat_message( | |
| _arguments=call_data["arguments"], | ||
| ) | ||
|
|
||
| # finish_reason is in the last chunk if usage is not included, and in the second last chunk if usage is included | ||
| finish_reason = (chunks[-2] if chunk.usage and len(chunks) >= 2 else chunks[-1]).meta.get("finish_reason") | ||
| # finish_reason can appear in different places so we look for the last one | ||
| finish_reasons = [ | ||
| chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None | ||
| ] | ||
| finish_reason = finish_reasons[-1] if finish_reasons else None | ||
|
|
||
| meta = { | ||
| "model": chunk.model, | ||
| "model": last_chunk.model, | ||
| "index": 0, | ||
| "finish_reason": finish_reason, | ||
| "completion_start_time": chunks[0].meta.get("received_at"), # first chunk received | ||
| "usage": chunk.usage or {}, | ||
| "usage": dict(last_chunk.usage or {}), # last chunk has the final usage data if available | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated this to follow what we did in our normal chat completion processing where we store the dict version of the usage otherwise it's returned as an openai Python type |
||
| } | ||
|
|
||
| return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta) | ||
|
|
@@ -558,6 +549,10 @@ def _convert_chat_completion_chunk_to_streaming_chunk(self, chunk: ChatCompletio | |
| :returns: | ||
| The StreamingChunk. | ||
| """ | ||
| # if there are no choices, return an empty chunk | ||
| if len(chunk.choices) == 0: | ||
| return StreamingChunk(content="", meta={"model": chunk.model, "received_at": datetime.now().isoformat()}) | ||
Amnah199 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # we stream the content of the chunk if it's not a tool or function call | ||
| choice: ChunkChoice = chunk.choices[0] | ||
| content = choice.delta.content or "" | ||
|
|
@@ -574,18 +569,3 @@ def _convert_chat_completion_chunk_to_streaming_chunk(self, chunk: ChatCompletio | |
| } | ||
| ) | ||
| return chunk_message | ||
|
|
||
| def _convert_usage_chunk_to_streaming_chunk(self, chunk: ChatCompletionChunk) -> StreamingChunk: | ||
| """ | ||
| Converts the usage chunk received from the OpenAI API when `include_usage` is set to `True` to a StreamingChunk. | ||
|
|
||
| :param chunk: The usage chunk returned by the OpenAI API. | ||
|
Comment on lines
-578
to
-582
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed this function since we didn't actually use the processed usage data from this when constructing the final ChatMessage. See the updated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I'll run it locally for the original use case behind this PR for verification that this works.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've also added a unit test and an integration test testing for usage stats when streaming and include_usage: True so I hope that covers it, but let me know if you have any issues |
||
|
|
||
| :returns: | ||
| The StreamingChunk. | ||
| """ | ||
| chunk_message = StreamingChunk(content="") | ||
| chunk_message.meta.update( | ||
| {"model": chunk.model, "usage": chunk.usage, "received_at": datetime.now().isoformat()} | ||
| ) | ||
| return chunk_message | ||
Uh oh!
There was an error while loading. Please reload this page.