Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 54 additions & 9 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,25 @@
)


def _set_finish_reason(
response: types.LlmResponse, finish_reason: Any
) -> None:
"""Sets the finish reason on the LlmResponse, mapping from string if necessary.

Args:
response: The LlmResponse object to update.
finish_reason: The finish reason value, either a FinishReason enum or a string
that needs to be mapped.
"""
if isinstance(finish_reason, types.FinishReason):
response.finish_reason = finish_reason
else:
finish_reason_str = str(finish_reason).lower()
response.finish_reason = _FINISH_REASON_MAPPING.get(
finish_reason_str, types.FinishReason.OTHER
)


def _decode_inline_text_data(raw_bytes: bytes) -> str:
"""Decodes inline file bytes that represent textual content."""
try:
Expand Down Expand Up @@ -367,11 +386,21 @@ def _content_to_message_param(
tool_messages = []
for part in content.parts:
if part.function_response:
# FIX: Check if response is already a string before serializing.
# MCP tool responses come as JSON strings, but _safe_json_serialize was
# double-serializing them (json.dumps on already-JSON strings), causing
# triple-nested JSON like: '{"content": [{"type": "text", "text": "{\n \"type\"..."}]}'
# This prevented Claude/GPT from parsing tool results correctly.
response_content = (
part.function_response.response
if isinstance(part.function_response.response, str)
else _safe_json_serialize(part.function_response.response)
)
tool_messages.append(
ChatCompletionToolMessage(
role="tool",
tool_call_id=part.function_response.id,
content=_safe_json_serialize(part.function_response.response),
content=response_content,
)
)
if tool_messages:
Expand Down Expand Up @@ -851,13 +880,7 @@ def _model_response_to_generate_content_response(
if finish_reason:
# If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use
# it directly. Otherwise, map the finish_reason string to the enum.
if isinstance(finish_reason, types.FinishReason):
llm_response.finish_reason = finish_reason
else:
finish_reason_str = str(finish_reason).lower()
llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
finish_reason_str, types.FinishReason.OTHER
)
_set_finish_reason(llm_response, finish_reason)
if response.get("usage", None):
llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
prompt_token_count=response["usage"].get("prompt_tokens", 0),
Expand Down Expand Up @@ -1339,7 +1362,13 @@ async def generate_content_async(
_message_to_generate_content_response(
ChatCompletionAssistantMessage(
role="assistant",
content=text,
# FIX: Set content=None for tool-only messages to avoid duplication
# and follow OpenAI/LiteLLM conventions. Planning/reasoning text is
# already streamed (lines 1288-1296) and preserved in thought_parts
# (line 1357). Including it again in content causes duplication and
# violates API specifications for tool-call messages.
# See: https://github.com/google/adk-python/issues/3697
content=None,
tool_calls=tool_calls,
),
model_version=part.model,
Expand All @@ -1348,6 +1377,14 @@ async def generate_content_async(
else None,
)
)
# FIX: Map finish_reason to FinishReason enum for streaming responses.
# Previously, streaming responses did not set finish_reason on aggregated
# LlmResponse objects, causing the ADK agent runner to not properly recognize
# completion states. This mirrors the logic from non-streaming path (lines 776-784)
# to ensure consistent behavior across both streaming and non-streaming modes.
# Without this, Claude and other models via LiteLLM would hit stop conditions
# that the agent couldn't properly handle.
_set_finish_reason(aggregated_llm_response_with_tool_call, finish_reason)
text = ""
reasoning_parts = []
function_calls.clear()
Expand All @@ -1362,6 +1399,14 @@ async def generate_content_async(
if reasoning_parts
else None,
)
# FIX: Map finish_reason to FinishReason enum for streaming text-only responses.
# Previously, streaming responses did not set finish_reason on aggregated
# LlmResponse objects, causing the ADK agent runner to not properly recognize
# completion states. This mirrors the logic from non-streaming path (lines 776-784)
# to ensure consistent behavior across both streaming and non-streaming modes.
# Without this, Claude and other models via LiteLLM would hit stop conditions
# that the agent couldn't properly handle.
_set_finish_reason(aggregated_llm_response, finish_reason)
text = ""
reasoning_parts = []

Expand Down
Loading