diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 9e3698b190..ded6f12aee 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -87,6 +87,25 @@ ) +def _set_finish_reason( + response: types.LlmResponse, finish_reason: Any +) -> None: + """Sets the finish reason on the LlmResponse, mapping from string if necessary. + + Args: + response: The LlmResponse object to update. + finish_reason: The finish reason value, either a FinishReason enum or a string + that needs to be mapped. + """ + if isinstance(finish_reason, types.FinishReason): + response.finish_reason = finish_reason + else: + finish_reason_str = str(finish_reason).lower() + response.finish_reason = _FINISH_REASON_MAPPING.get( + finish_reason_str, types.FinishReason.OTHER + ) + + def _decode_inline_text_data(raw_bytes: bytes) -> str: """Decodes inline file bytes that represent textual content.""" try: @@ -367,11 +386,21 @@ def _content_to_message_param( tool_messages = [] for part in content.parts: if part.function_response: + # FIX: Check if response is already a string before serializing. + # MCP tool responses come as JSON strings, but _safe_json_serialize was + # double-serializing them (json.dumps on already-JSON strings), causing + # triple-nested JSON like: '{"content": [{"type": "text", "text": "{\n \"type\"..."}]}' + # This prevented Claude/GPT from parsing tool results correctly. + response_content = ( + part.function_response.response + if isinstance(part.function_response.response, str) + else _safe_json_serialize(part.function_response.response) + ) tool_messages.append( ChatCompletionToolMessage( role="tool", tool_call_id=part.function_response.id, - content=_safe_json_serialize(part.function_response.response), + content=response_content, ) ) if tool_messages: @@ -851,13 +880,7 @@ def _model_response_to_generate_content_response( if finish_reason: # If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use # it directly. Otherwise, map the finish_reason string to the enum. - if isinstance(finish_reason, types.FinishReason): - llm_response.finish_reason = finish_reason - else: - finish_reason_str = str(finish_reason).lower() - llm_response.finish_reason = _FINISH_REASON_MAPPING.get( - finish_reason_str, types.FinishReason.OTHER - ) + _set_finish_reason(llm_response, finish_reason) if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), @@ -1339,7 +1362,13 @@ async def generate_content_async( _message_to_generate_content_response( ChatCompletionAssistantMessage( role="assistant", - content=text, + # FIX: Set content=None for tool-only messages to avoid duplication + # and follow OpenAI/LiteLLM conventions. Planning/reasoning text is + # already streamed (lines 1288-1296) and preserved in thought_parts + # (line 1357). Including it again in content causes duplication and + # violates API specifications for tool-call messages. + # See: https://github.com/google/adk-python/issues/3697 + content=None, tool_calls=tool_calls, ), model_version=part.model, @@ -1348,6 +1377,14 @@ async def generate_content_async( else None, ) ) + # FIX: Map finish_reason to FinishReason enum for streaming responses. + # Previously, streaming responses did not set finish_reason on aggregated + # LlmResponse objects, causing the ADK agent runner to not properly recognize + # completion states. This mirrors the logic from non-streaming path (lines 776-784) + # to ensure consistent behavior across both streaming and non-streaming modes. + # Without this, Claude and other models via LiteLLM would hit stop conditions + # that the agent couldn't properly handle. + _set_finish_reason(aggregated_llm_response_with_tool_call, finish_reason) text = "" reasoning_parts = [] function_calls.clear() @@ -1362,6 +1399,14 @@ async def generate_content_async( if reasoning_parts else None, ) + # FIX: Map finish_reason to FinishReason enum for streaming text-only responses. + # Previously, streaming responses did not set finish_reason on aggregated + # LlmResponse objects, causing the ADK agent runner to not properly recognize + # completion states. This mirrors the logic from non-streaming path (lines 776-784) + # to ensure consistent behavior across both streaming and non-streaming modes. + # Without this, Claude and other models via LiteLLM would hit stop conditions + # that the agent couldn't properly handle. + _set_finish_reason(aggregated_llm_response, finish_reason) text = "" reasoning_parts = []