google · thesynapses · Nov 23, 2025 · Nov 23, 2025 · Nov 23, 2025 · Nov 24, 2025
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -87,6 +87,25 @@
 )
 
 
+def _set_finish_reason(
+    response: types.LlmResponse, finish_reason: Any
+) -> None:
+  """Sets the finish reason on the LlmResponse, mapping from string if necessary.
+
+  Args:
+    response: The LlmResponse object to update.
+    finish_reason: The finish reason value, either a FinishReason enum or a string
+                   that needs to be mapped.
+  """
+  if isinstance(finish_reason, types.FinishReason):
+    response.finish_reason = finish_reason
+  else:
+    finish_reason_str = str(finish_reason).lower()
+    response.finish_reason = _FINISH_REASON_MAPPING.get(
+        finish_reason_str, types.FinishReason.OTHER
+    )
+
+
 def _decode_inline_text_data(raw_bytes: bytes) -> str:
   """Decodes inline file bytes that represent textual content."""
   try:
@@ -367,11 +386,21 @@ def _content_to_message_param(
   tool_messages = []
   for part in content.parts:
     if part.function_response:
+      # FIX: Check if response is already a string before serializing.
+      # MCP tool responses come as JSON strings, but _safe_json_serialize was
+      # double-serializing them (json.dumps on already-JSON strings), causing
+      # triple-nested JSON like: '{"content": [{"type": "text", "text": "{\n \"type\"..."}]}'
+      # This prevented Claude/GPT from parsing tool results correctly.
+      response_content = (
+          part.function_response.response
+          if isinstance(part.function_response.response, str)
+          else _safe_json_serialize(part.function_response.response)
+      )
       tool_messages.append(
           ChatCompletionToolMessage(
               role="tool",
               tool_call_id=part.function_response.id,
-              content=_safe_json_serialize(part.function_response.response),
+              content=response_content,
           )
       )
   if tool_messages:
@@ -851,13 +880,7 @@ def _model_response_to_generate_content_response(
   if finish_reason:
     # If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use
     # it directly. Otherwise, map the finish_reason string to the enum.
-    if isinstance(finish_reason, types.FinishReason):
-      llm_response.finish_reason = finish_reason
-    else:
-      finish_reason_str = str(finish_reason).lower()
-      llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
-          finish_reason_str, types.FinishReason.OTHER
-      )
+    _set_finish_reason(llm_response, finish_reason)
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
@@ -1339,7 +1362,13 @@ async def generate_content_async(
                 _message_to_generate_content_response(
                     ChatCompletionAssistantMessage(
                         role="assistant",
-                        content=text,
+                        # FIX: Set content=None for tool-only messages to avoid duplication
+                        # and follow OpenAI/LiteLLM conventions. Planning/reasoning text is
+                        # already streamed (lines 1288-1296) and preserved in thought_parts
+                        # (line 1357). Including it again in content causes duplication and
+                        # violates API specifications for tool-call messages.
+                        # See: https://github.com/google/adk-python/issues/3697
+                        content=None,
                         tool_calls=tool_calls,
                     ),
                     model_version=part.model,
@@ -1348,6 +1377,14 @@ async def generate_content_async(
                     else None,
                 )
             )
+            # FIX: Map finish_reason to FinishReason enum for streaming responses.
+            # Previously, streaming responses did not set finish_reason on aggregated
+            # LlmResponse objects, causing the ADK agent runner to not properly recognize
+            # completion states. This mirrors the logic from non-streaming path (lines 776-784)
+            # to ensure consistent behavior across both streaming and non-streaming modes.
+            # Without this, Claude and other models via LiteLLM would hit stop conditions
+            # that the agent couldn't properly handle.
+            _set_finish_reason(aggregated_llm_response_with_tool_call, finish_reason)
             text = ""
             reasoning_parts = []
             function_calls.clear()
@@ -1362,6 +1399,14 @@ async def generate_content_async(
                 if reasoning_parts
                 else None,
             )
+            # FIX: Map finish_reason to FinishReason enum for streaming text-only responses.
+            # Previously, streaming responses did not set finish_reason on aggregated
+            # LlmResponse objects, causing the ADK agent runner to not properly recognize
+            # completion states. This mirrors the logic from non-streaming path (lines 776-784)
+            # to ensure consistent behavior across both streaming and non-streaming modes.
+            # Without this, Claude and other models via LiteLLM would hit stop conditions
+            # that the agent couldn't properly handle.
+            _set_finish_reason(aggregated_llm_response, finish_reason)
             text = ""
             reasoning_parts = []