From cb92b70dac02a6e48ad0c21c9c6eabf14c44d2ce Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:37:19 -0700 Subject: [PATCH 01/18] fix: propagate finish_reason from LiteLLM responses Fixes #3109 This change ensures that the finish_reason field from LiteLLM responses is properly propagated to LlmResponse objects, enabling callbacks to detect completion conditions like max_tokens truncation. Changes: - Extract finish_reason from LiteLLM response in lite_llm.py - Update tracing.py to handle both enum (Gemini) and string (LiteLLM) finish_reason values - Add comprehensive unit tests for finish_reason propagation The fix allows after_model_callback functions to properly detect: - "length": max_tokens limit reached - "stop": natural completion - "tool_calls": tool invocations - "content_filter": filtered content --- src/google/adk/models/lite_llm.py | 4 + src/google/adk/telemetry/tracing.py | 6 +- tests/unittests/models/test_litellm.py | 140 +++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 94d9831c39..e1f3d0d70f 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -494,13 +494,17 @@ def _model_response_to_generate_content_response( """ message = None + finish_reason = None if response.get("choices", None): message = response["choices"][0].get("message", None) + finish_reason = response["choices"][0].get("finish_reason", None) if not message: raise ValueError("No message in response") llm_response = _message_to_generate_content_response(message) + if finish_reason: + llm_response.finish_reason = finish_reason if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 021471a5aa..8d3f841a1f 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -303,9 +303,13 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: + if hasattr(llm_response.finish_reason, 'value'): + finish_reason_str = llm_response.finish_reason.value.lower() + else: + finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', - [llm_response.finish_reason.value.lower()], + [finish_reason_str], ) diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 2fbacc0f1b..1ab2f13ee7 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1903,3 +1903,143 @@ def test_non_gemini_litellm_no_warning(): # Test with non-Gemini model LiteLlm(model="openai/gpt-4o") assert len(w) == 0 + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_non_streaming( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Test response", + ), + finish_reason="length", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.content.role == "model" + assert response.content.parts[0].text == "Test response" + assert response.finish_reason == "length" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_stop( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='stop' is properly propagated.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Complete response", + ), + finish_reason="stop", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "stop" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_tool_calls( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='tool_calls' is properly propagated.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="", + tool_calls=[ + ChatCompletionMessageToolCall( + type="function", + id="test_id", + function=Function( + name="test_function", + arguments='{"arg": "value"}', + ), + ) + ], + ), + finish_reason="tool_calls", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "tool_calls" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_content_filter( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='content_filter' is properly propagated.""" + mock_response_with_content_filter = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="", + ), + finish_reason="content_filter", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_content_filter + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "content_filter" + + mock_acompletion.assert_called_once() From 5c43ac2a97f208cf4ccabf5eba89576786ed11c0 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:45:32 -0700 Subject: [PATCH 02/18] refactor: address code review feedback - Use .name instead of .value for enum finish_reason (more robust for IntEnum) - Extract first choice using walrus operator for better readability - Consolidate tests using @pytest.mark.parametrize to reduce duplication - Strengthen test assertions to verify response content All 53 tests pass. --- src/google/adk/models/lite_llm.py | 8 +- src/google/adk/telemetry/tracing.py | 4 +- tests/unittests/models/test_litellm.py | 162 +++++++------------------ 3 files changed, 54 insertions(+), 120 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index e1f3d0d70f..8d33cdd22c 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -495,9 +495,11 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if response.get("choices", None): - message = response["choices"][0].get("message", None) - finish_reason = response["choices"][0].get("finish_reason", None) + if response.get("choices", None) and ( + first_choice := response["choices"][0] + ): + message = first_choice.get("message", None) + finish_reason = first_choice.get("finish_reason", None) if not message: raise ValueError("No message in response") diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 8d3f841a1f..42fa927e6c 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -303,8 +303,8 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if hasattr(llm_response.finish_reason, 'value'): - finish_reason_str = llm_response.finish_reason.value.lower() + if hasattr(llm_response.finish_reason, 'name'): + finish_reason_str = llm_response.finish_reason.name.lower() else: finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 1ab2f13ee7..000717d3d5 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1905,131 +1905,57 @@ def test_non_gemini_litellm_no_warning(): assert len(w) == 0 +@pytest.mark.parametrize( + "finish_reason,response_content,expected_content,has_tool_calls", + [ + ("length", "Test response", "Test response", False), + ("stop", "Complete response", "Complete response", False), + ( + "tool_calls", + "", + "", + True, + ), + ("content_filter", "", "", False), + ], + ids=["length", "stop", "tool_calls", "content_filter"], +) @pytest.mark.asyncio -async def test_finish_reason_propagation_non_streaming( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="Test response", - ), - finish_reason="length", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.content.role == "model" - assert response.content.parts[0].text == "Test response" - assert response.finish_reason == "length" - - mock_acompletion.assert_called_once() - - -@pytest.mark.asyncio -async def test_finish_reason_propagation_stop( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason='stop' is properly propagated.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="Complete response", - ), - finish_reason="stop", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "stop" - - mock_acompletion.assert_called_once() - - -@pytest.mark.asyncio -async def test_finish_reason_propagation_tool_calls( - mock_acompletion, lite_llm_instance +async def test_finish_reason_propagation( + mock_acompletion, + lite_llm_instance, + finish_reason, + response_content, + expected_content, + has_tool_calls, ): - """Test that finish_reason='tool_calls' is properly propagated.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="", - tool_calls=[ - ChatCompletionMessageToolCall( - type="function", - id="test_id", - function=Function( - name="test_function", - arguments='{"arg": "value"}', - ), - ) - ], - ), - finish_reason="tool_calls", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "tool_calls" - - mock_acompletion.assert_called_once() - + """Test that finish_reason is properly propagated from LiteLLM response.""" + tool_calls = None + if has_tool_calls: + tool_calls = [ + ChatCompletionMessageToolCall( + type="function", + id="test_id", + function=Function( + name="test_function", + arguments='{"arg": "value"}', + ), + ) + ] -@pytest.mark.asyncio -async def test_finish_reason_content_filter( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason='content_filter' is properly propagated.""" - mock_response_with_content_filter = ModelResponse( + mock_response = ModelResponse( choices=[ Choices( message=ChatCompletionAssistantMessage( role="assistant", - content="", + content=response_content, + tool_calls=tool_calls, ), - finish_reason="content_filter", + finish_reason=finish_reason, ) ] ) - mock_acompletion.return_value = mock_response_with_content_filter + mock_acompletion.return_value = mock_response llm_request = LlmRequest( contents=[ @@ -2040,6 +1966,12 @@ async def test_finish_reason_content_filter( ) async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "content_filter" + assert response.content.role == "model" + assert response.finish_reason == finish_reason + if expected_content: + assert response.content.parts[0].text == expected_content + if has_tool_calls: + assert len(response.content.parts) > 0 + assert response.content.parts[-1].function_call.name == "test_function" mock_acompletion.assert_called_once() From ac27a3aa6470c6bdbcf4260153d56aebbed321ce Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:51:42 -0700 Subject: [PATCH 03/18] Update src/google/adk/models/lite_llm.py Addressing review comments Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/models/lite_llm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 8d33cdd22c..4cbf0bdafe 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -495,9 +495,8 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if response.get("choices", None) and ( - first_choice := response["choices"][0] - ): + if choices := response.get("choices"): + first_choice = choices[0] message = first_choice.get("message", None) finish_reason = first_choice.get("finish_reason", None) From 3924bcb2ba6b475df3bd5776e57bee9490e16f2e Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:51:53 -0700 Subject: [PATCH 04/18] Update src/google/adk/telemetry/tracing.py Addressing review comments Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/telemetry/tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 42fa927e6c..7f37106251 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -303,7 +303,7 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if hasattr(llm_response.finish_reason, 'name'): + if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: finish_reason_str = str(llm_response.finish_reason).lower() From ab6f5779e38a143b66a90e39c4710863dbff553f Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:55:13 -0700 Subject: [PATCH 05/18] fix: update finish_reason type hint to support both enum and string Address type safety issue where finish_reason can be either: - types.FinishReason enum (from Gemini responses) - str (from LiteLLM responses) Updated LlmResponse.finish_reason type hint to: Optional[Union[types.FinishReason, str]] This ensures type checkers correctly validate the dual nature of this field across different model providers. All 53 tests pass. --- src/google/adk/models/llm_response.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index 56eb6318c1..c0ab7f7b16 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -16,6 +16,7 @@ from typing import Any from typing import Optional +from typing import Union from google.genai import types from pydantic import alias_generators @@ -77,8 +78,11 @@ class LlmResponse(BaseModel): Only used for streaming mode. """ - finish_reason: Optional[types.FinishReason] = None - """The finish reason of the response.""" + finish_reason: Optional[Union[types.FinishReason, str]] = None + """The finish reason of the response. + + Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM). + """ error_code: Optional[str] = None """Error code if the response is an error. Code varies by model.""" From 79961122bf104b28d730f885aea97a81bb60e9a7 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:11:53 -0700 Subject: [PATCH 06/18] feat: map LiteLLM finish_reason strings to FinishReason enum - Map finish_reason strings to proper FinishReason enum values in lite_llm.py - 'length' -> FinishReason.MAX_TOKENS - 'stop' -> FinishReason.STOP - 'tool_calls'/'function_call' -> FinishReason.STOP - 'content_filter' -> FinishReason.SAFETY - unknown values -> FinishReason.OTHER - Add clarifying comment in tracing.py for string fallback path - Update test_litellm.py to verify enum mapping: - Assert finish_reason is FinishReason enum instance - Verify correct enum values for each finish_reason string - Add test for unknown finish_reason mapping to OTHER Benefits: - Type consistency with Gemini native responses - Avoids runtime warnings from string finish_reason - Enables proper instanceof checks in callbacks - Better integration with ADK telemetry --- src/google/adk/models/lite_llm.py | 16 ++++++++- src/google/adk/telemetry/tracing.py | 1 + tests/unittests/models/test_litellm.py | 49 +++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 4cbf0bdafe..95c1369245 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -505,7 +505,21 @@ def _model_response_to_generate_content_response( llm_response = _message_to_generate_content_response(message) if finish_reason: - llm_response.finish_reason = finish_reason + # Map LiteLLM finish_reason strings to FinishReason enum + # This provides type consistency with Gemini native responses and avoids warnings + finish_reason_str = str(finish_reason).lower() + if finish_reason_str == "length": + llm_response.finish_reason = types.FinishReason.MAX_TOKENS + elif finish_reason_str == "stop": + llm_response.finish_reason = types.FinishReason.STOP + elif "tool" in finish_reason_str or "function" in finish_reason_str: + # Handle tool_calls, function_call variants + llm_response.finish_reason = types.FinishReason.STOP + elif finish_reason_str == "content_filter": + llm_response.finish_reason = types.FinishReason.SAFETY + else: + # For unknown reasons, use OTHER + llm_response.finish_reason = types.FinishReason.OTHER if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 7f37106251..e4ee072345 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -306,6 +306,7 @@ def trace_call_llm( if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: + # Fallback for string values (should not occur with LiteLLM after enum mapping) finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 000717d3d5..14eb97f617 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1967,7 +1967,17 @@ async def test_finish_reason_propagation( async for response in lite_llm_instance.generate_content_async(llm_request): assert response.content.role == "model" - assert response.finish_reason == finish_reason + # Verify finish_reason is mapped to FinishReason enum, not raw string + assert isinstance(response.finish_reason, types.FinishReason) + # Verify correct enum mapping + if finish_reason == "length": + assert response.finish_reason == types.FinishReason.MAX_TOKENS + elif finish_reason == "stop": + assert response.finish_reason == types.FinishReason.STOP + elif finish_reason == "tool_calls": + assert response.finish_reason == types.FinishReason.STOP + elif finish_reason == "content_filter": + assert response.finish_reason == types.FinishReason.SAFETY if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: @@ -1975,3 +1985,40 @@ async def test_finish_reason_propagation( assert response.content.parts[-1].function_call.name == "test_function" mock_acompletion.assert_called_once() + + + +@pytest.mark.asyncio +async def test_finish_reason_unknown_maps_to_other( + mock_acompletion, lite_llm_instance +): + """Test that unknown finish_reason values map to FinishReason.OTHER.""" + mock_response = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Test response", + ), + finish_reason="unknown_reason_type", + ) + ] + ) + mock_acompletion.return_value = mock_response + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.content.role == "model" + # Unknown finish_reason should map to OTHER + assert isinstance(response.finish_reason, types.FinishReason) + assert response.finish_reason == types.FinishReason.OTHER + + mock_acompletion.assert_called_once() + From a1c09388e1d851293101afef0b18ea471d01c665 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:40:00 -0700 Subject: [PATCH 07/18] feat: map LiteLLM finish_reason strings to FinishReason enum Maps LiteLLM finish_reason string values to proper FinishReason enum for type consistency with Gemini native responses. Changes: - Add _FINISH_REASON_MAPPING dictionary for string->enum conversion - "length" -> FinishReason.MAX_TOKENS - "stop" -> FinishReason.STOP - "tool_calls"/"function_call" -> FinishReason.STOP - "content_filter" -> FinishReason.SAFETY - Unknown values -> FinishReason.OTHER (fallback) - Update finish_reason type hint to Optional[FinishReason] (no Union needed) - Update telemetry tracing to use .name for enum serialization - Add explanatory comments: - Why tool_calls maps to STOP (no TOOL_CALL enum exists) - Docstring clarifies mapping applies to all model providers Tests: - test_finish_reason_propagation: verifies enum mapping for all values - test_finish_reason_unknown_maps_to_other: verifies fallback behavior Benefits: - Type consistency: finish_reason is always FinishReason enum - No runtime warnings from mixed types - Enables proper isinstance() checks in callbacks - Dictionary mapping improves maintainability - Better integration with ADK telemetry --- src/google/adk/models/lite_llm.py | 28 +++++++++++++++------------ src/google/adk/models/llm_response.py | 5 +++-- src/google/adk/telemetry/tracing.py | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 95c1369245..b1121acba2 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -64,6 +64,19 @@ _NEW_LINE = "\n" _EXCLUDED_PART_FIELD = {"inline_data": {"data"}} +# Mapping of LiteLLM finish_reason strings to FinishReason enum values +# Note: tool_calls/function_call map to STOP because: +# 1. FinishReason.TOOL_CALL enum does not exist (as of google-genai 0.8.0) +# 2. Tool calls represent normal completion (model stopped to invoke tools) +# 3. Gemini native responses use STOP for tool calls (see lite_llm.py:910) +_FINISH_REASON_MAPPING = { + "length": types.FinishReason.MAX_TOKENS, + "stop": types.FinishReason.STOP, + "tool_calls": types.FinishReason.STOP, # Normal completion with tool invocation + "function_call": types.FinishReason.STOP, # Legacy function call variant + "content_filter": types.FinishReason.SAFETY, +} + class ChatCompletionFileUrlObject(TypedDict, total=False): file_data: str @@ -508,18 +521,9 @@ def _model_response_to_generate_content_response( # Map LiteLLM finish_reason strings to FinishReason enum # This provides type consistency with Gemini native responses and avoids warnings finish_reason_str = str(finish_reason).lower() - if finish_reason_str == "length": - llm_response.finish_reason = types.FinishReason.MAX_TOKENS - elif finish_reason_str == "stop": - llm_response.finish_reason = types.FinishReason.STOP - elif "tool" in finish_reason_str or "function" in finish_reason_str: - # Handle tool_calls, function_call variants - llm_response.finish_reason = types.FinishReason.STOP - elif finish_reason_str == "content_filter": - llm_response.finish_reason = types.FinishReason.SAFETY - else: - # For unknown reasons, use OTHER - llm_response.finish_reason = types.FinishReason.OTHER + llm_response.finish_reason = _FINISH_REASON_MAPPING.get( + finish_reason_str, types.FinishReason.OTHER + ) if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index c0ab7f7b16..982127bb1d 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -78,10 +78,11 @@ class LlmResponse(BaseModel): Only used for streaming mode. """ - finish_reason: Optional[Union[types.FinishReason, str]] = None + finish_reason: Optional[types.FinishReason] = None """The finish reason of the response. - Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM). + Always a types.FinishReason enum. String values from underlying model providers + are mapped to corresponding enum values (with fallback to OTHER for unknown values). """ error_code: Optional[str] = None diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index e4ee072345..d89dbfb575 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -306,7 +306,7 @@ def trace_call_llm( if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: - # Fallback for string values (should not occur with LiteLLM after enum mapping) + # Defensive fallback for string values (should never occur - all values mapped to enum) finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', From 692af95004ffa46d33facbeb2e946a560fb4ac79 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:49:45 -0700 Subject: [PATCH 08/18] refactor: address bot review suggestions - Simplify tracing.py by removing isinstance check (always enum now) - Refactor test assertions to use dictionary mapping instead of if/elif - Reduce code duplication and improve readability Addresses Gemini Code Assist bot suggestions: - tracing.py: Direct .name access since finish_reason is always enum - test_litellm.py: Dictionary mapping for cleaner test assertions --- src/google/adk/telemetry/tracing.py | 7 ++----- tests/unittests/models/test_litellm.py | 19 +++++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index d89dbfb575..5366a8015b 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -303,11 +303,8 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if isinstance(llm_response.finish_reason, types.FinishReason): - finish_reason_str = llm_response.finish_reason.name.lower() - else: - # Defensive fallback for string values (should never occur - all values mapped to enum) - finish_reason_str = str(llm_response.finish_reason).lower() + # finish_reason is always FinishReason enum + finish_reason_str = llm_response.finish_reason.name.lower() span.set_attribute( 'gen_ai.response.finish_reasons', [finish_reason_str], diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 14eb97f617..85e6b72fdd 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1967,17 +1967,16 @@ async def test_finish_reason_propagation( async for response in lite_llm_instance.generate_content_async(llm_request): assert response.content.role == "model" - # Verify finish_reason is mapped to FinishReason enum, not raw string + # Verify finish_reason is mapped to FinishReason enum assert isinstance(response.finish_reason, types.FinishReason) - # Verify correct enum mapping - if finish_reason == "length": - assert response.finish_reason == types.FinishReason.MAX_TOKENS - elif finish_reason == "stop": - assert response.finish_reason == types.FinishReason.STOP - elif finish_reason == "tool_calls": - assert response.finish_reason == types.FinishReason.STOP - elif finish_reason == "content_filter": - assert response.finish_reason == types.FinishReason.SAFETY + # Verify correct enum mapping using dictionary + expected_mapping = { + "length": types.FinishReason.MAX_TOKENS, + "stop": types.FinishReason.STOP, + "tool_calls": types.FinishReason.STOP, + "content_filter": types.FinishReason.SAFETY, + } + assert response.finish_reason == expected_mapping[finish_reason] if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: From 538a5b068eb409966e5a0564afa3d66ff7c46a3f Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:56:55 -0700 Subject: [PATCH 09/18] refactor: use _FINISH_REASON_MAPPING directly in tests Import and use the actual _FINISH_REASON_MAPPING from lite_llm instead of duplicating it in tests. This ensures tests stay in sync with implementation changes automatically. Benefits: - Single source of truth for finish_reason mappings - Tests automatically reflect any future mapping changes - Reduced code duplication - Better maintainability Addresses review comment: https://github.com/google/adk-python/pull/3114#pullrequestreview-3338249498 --- tests/unittests/models/test_litellm.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 85e6b72fdd..f15bebfe0a 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -19,6 +19,7 @@ import warnings from google.adk.models.lite_llm import _content_to_message_param +from google.adk.models.lite_llm import _FINISH_REASON_MAPPING from google.adk.models.lite_llm import _function_declaration_to_tool_param from google.adk.models.lite_llm import _get_content from google.adk.models.lite_llm import _message_to_generate_content_response @@ -1969,14 +1970,8 @@ async def test_finish_reason_propagation( assert response.content.role == "model" # Verify finish_reason is mapped to FinishReason enum assert isinstance(response.finish_reason, types.FinishReason) - # Verify correct enum mapping using dictionary - expected_mapping = { - "length": types.FinishReason.MAX_TOKENS, - "stop": types.FinishReason.STOP, - "tool_calls": types.FinishReason.STOP, - "content_filter": types.FinishReason.SAFETY, - } - assert response.finish_reason == expected_mapping[finish_reason] + # Verify correct enum mapping using the actual mapping from lite_llm + assert response.finish_reason == _FINISH_REASON_MAPPING[finish_reason] if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: From cb44fb436af49ce1be68ea48ede39ee457e39be6 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 21:06:39 -0700 Subject: [PATCH 10/18] refactor: remove unused Union import from llm_response.py The Union type is no longer needed since finish_reason is always a FinishReason enum (never a string after our mapping). Addresses review comment: https://github.com/google/adk-python/pull/3114#discussion_r2431044481 --- src/google/adk/models/llm_response.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index 982127bb1d..fc5190a277 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -16,7 +16,6 @@ from typing import Any from typing import Optional -from typing import Union from google.genai import types from pydantic import alias_generators From 0becccfd304b150490933a5954393dcc52de05a7 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 21:09:45 -0700 Subject: [PATCH 11/18] Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/models/lite_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index b1121acba2..f6fcaee279 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -508,7 +508,7 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if choices := response.get("choices"): + if (choices := response.get("choices")) and choices: first_choice = choices[0] message = first_choice.get("message", None) finish_reason = first_choice.get("finish_reason", None) From 63d8b71423518a80e447f0d70d5050f74b6145ed Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 10:52:50 -0700 Subject: [PATCH 12/18] fix: apply review suggestions for litellm finish_reason --- src/google/adk/models/lite_llm.py | 4 +++- tests/unittests/models/test_litellm.py | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index f6fcaee279..1964995b5c 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -72,7 +72,9 @@ _FINISH_REASON_MAPPING = { "length": types.FinishReason.MAX_TOKENS, "stop": types.FinishReason.STOP, - "tool_calls": types.FinishReason.STOP, # Normal completion with tool invocation + "tool_calls": ( + types.FinishReason.STOP + ), # Normal completion with tool invocation "function_call": types.FinishReason.STOP, # Legacy function call variant "content_filter": types.FinishReason.SAFETY, } diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index f15bebfe0a..9d7e9494e6 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1981,7 +1981,6 @@ async def test_finish_reason_propagation( mock_acompletion.assert_called_once() - @pytest.mark.asyncio async def test_finish_reason_unknown_maps_to_other( mock_acompletion, lite_llm_instance @@ -2015,4 +2014,3 @@ async def test_finish_reason_unknown_maps_to_other( assert response.finish_reason == types.FinishReason.OTHER mock_acompletion.assert_called_once() - From 76a8f248bc27df749a7f2ea467eff1f23f4f973c Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 13:30:10 -0700 Subject: [PATCH 13/18] Add e2e test for litellm finish reason --- .../samples/litellm_reasoning_agent/README.md | 14 ++++ .../samples/litellm_reasoning_agent/agent.py | 71 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 contributing/samples/litellm_reasoning_agent/README.md create mode 100644 contributing/samples/litellm_reasoning_agent/agent.py diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md new file mode 100644 index 0000000000..6b62d386c0 --- /dev/null +++ b/contributing/samples/litellm_reasoning_agent/README.md @@ -0,0 +1,14 @@ +# Finish Reason Test Agent + +This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object. + +The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM. It sets `max_tokens=50` to force the model to stop execution due to length constraints. An `after_model_callback` is used to inspect the `response.finish_reason` and verify that it is `length`. + +## Running the test + +To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `agent.py` script directly. + +```bash +export OPENAI_API_KEY="your-api-key-here" +python agent.py +``` diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py new file mode 100644 index 0000000000..a636e9c0ff --- /dev/null +++ b/contributing/samples/litellm_reasoning_agent/agent.py @@ -0,0 +1,71 @@ +import asyncio +import os +from google.adk.agents import Agent +from google.adk.runners import Runner +from google.adk.agents.callback_context import CallbackContext +from google.adk.models.lite_llm import LiteLlm +from google.adk.models.llm_response import LlmResponse +from google.adk.sessions import InMemorySessionService +from google.genai import types + + +def create_inspector(): + """Callback to capture finish_reason.""" + captured = {"finish_reason": None} + + def inspector(callback_context: CallbackContext, llm_response: LlmResponse) -> LlmResponse: + captured["finish_reason"] = llm_response.finish_reason + return llm_response + + inspector.captured = captured + return inspector + + +async def test(): + # Create model with low max_tokens to trigger truncation + model = LiteLlm( + model="gpt-3.5-turbo", + api_key=os.environ.get("OPENAI_API_KEY"), + max_tokens=50, # Intentionally low + ) + + inspector = create_inspector() + + agent = Agent( + model=model, + name="test", + instruction="Provide detailed explanations.", + after_model_callback=inspector, + ) + + session_service = InMemorySessionService() + runner = Runner( + app_name="test", + agent=agent, + session_service=session_service + ) + + await session_service.create_session( + app_name="test", + user_id="user", + session_id="session", + state={}, + ) + + message = types.Content( + role="user", + parts=[types.Part(text="Explain quantum computing in detail.")] + ) + + async for _ in runner.run_async( + user_id="user", + session_id="session", + new_message=message + ): + pass + + print(f"finish_reason: {inspector.captured['finish_reason']}") + + +if __name__ == "__main__": + asyncio.run(test()) From c736928ebd85437c0a276b9b890ed0cf4761a579 Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 13:32:45 -0700 Subject: [PATCH 14/18] fix python formatting --- .../samples/litellm_reasoning_agent/agent.py | 101 +++++++++--------- 1 file changed, 49 insertions(+), 52 deletions(-) diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py index a636e9c0ff..6fa9ba89c5 100644 --- a/contributing/samples/litellm_reasoning_agent/agent.py +++ b/contributing/samples/litellm_reasoning_agent/agent.py @@ -1,71 +1,68 @@ import asyncio import os + from google.adk.agents import Agent -from google.adk.runners import Runner from google.adk.agents.callback_context import CallbackContext from google.adk.models.lite_llm import LiteLlm from google.adk.models.llm_response import LlmResponse +from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types def create_inspector(): - """Callback to capture finish_reason.""" - captured = {"finish_reason": None} + """Callback to capture finish_reason.""" + captured = {"finish_reason": None} - def inspector(callback_context: CallbackContext, llm_response: LlmResponse) -> LlmResponse: - captured["finish_reason"] = llm_response.finish_reason - return llm_response + def inspector( + callback_context: CallbackContext, llm_response: LlmResponse + ) -> LlmResponse: + captured["finish_reason"] = llm_response.finish_reason + return llm_response - inspector.captured = captured - return inspector + inspector.captured = captured + return inspector async def test(): - # Create model with low max_tokens to trigger truncation - model = LiteLlm( - model="gpt-3.5-turbo", - api_key=os.environ.get("OPENAI_API_KEY"), - max_tokens=50, # Intentionally low - ) - - inspector = create_inspector() - - agent = Agent( - model=model, - name="test", - instruction="Provide detailed explanations.", - after_model_callback=inspector, - ) - - session_service = InMemorySessionService() - runner = Runner( - app_name="test", - agent=agent, - session_service=session_service - ) - - await session_service.create_session( - app_name="test", - user_id="user", - session_id="session", - state={}, - ) - - message = types.Content( - role="user", - parts=[types.Part(text="Explain quantum computing in detail.")] - ) - - async for _ in runner.run_async( - user_id="user", - session_id="session", - new_message=message - ): - pass - - print(f"finish_reason: {inspector.captured['finish_reason']}") + # Create model with low max_tokens to trigger truncation + model = LiteLlm( + model="gpt-3.5-turbo", + api_key=os.environ.get("OPENAI_API_KEY"), + max_tokens=50, # Intentionally low + ) + + inspector = create_inspector() + + agent = Agent( + model=model, + name="test", + instruction="Provide detailed explanations.", + after_model_callback=inspector, + ) + + session_service = InMemorySessionService() + runner = Runner(app_name="test", agent=agent, session_service=session_service) + + await session_service.create_session( + app_name="test", + user_id="user", + session_id="session", + state={}, + ) + + message = types.Content( + role="user", + parts=[types.Part(text="Explain quantum computing in detail.")], + ) + + async for _ in runner.run_async( + user_id="user", session_id="session", new_message=message + ): + pass + + print(f"finish_reason: {inspector.captured['finish_reason']}") if __name__ == "__main__": - asyncio.run(test()) + asyncio.run(test()) From 9622dee4b3304201f756f4588cf1445eb780235e Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 14:53:30 -0700 Subject: [PATCH 15/18] add license --- .../samples/litellm_reasoning_agent/agent.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py index 6fa9ba89c5..0134be259e 100644 --- a/contributing/samples/litellm_reasoning_agent/agent.py +++ b/contributing/samples/litellm_reasoning_agent/agent.py @@ -1,3 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio import os From b8b49fb307c2e51520f1df9981e98c685d2bb7e5 Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 16:42:38 -0700 Subject: [PATCH 16/18] fix sample reasoning agent --- .../samples/litellm_reasoning_agent/README.md | 6 +- .../samples/litellm_reasoning_agent/agent.py | 59 ++++--------------- .../samples/litellm_reasoning_agent/main.py | 48 +++++++++++++++ 3 files changed, 64 insertions(+), 49 deletions(-) create mode 100644 contributing/samples/litellm_reasoning_agent/main.py diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md index 6b62d386c0..48575dc491 100644 --- a/contributing/samples/litellm_reasoning_agent/README.md +++ b/contributing/samples/litellm_reasoning_agent/README.md @@ -1,4 +1,4 @@ -# Finish Reason Test Agent +# LiteLLM Reasoning Agent This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object. @@ -6,9 +6,9 @@ The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM ## Running the test -To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `agent.py` script directly. +To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `main.py` script directly. ```bash export OPENAI_API_KEY="your-api-key-here" -python agent.py +python main.py ``` diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py index 0134be259e..f6c953a5a7 100644 --- a/contributing/samples/litellm_reasoning_agent/agent.py +++ b/contributing/samples/litellm_reasoning_agent/agent.py @@ -12,18 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import asyncio import os from google.adk.agents import Agent from google.adk.agents.callback_context import CallbackContext from google.adk.models.lite_llm import LiteLlm from google.adk.models.llm_response import LlmResponse -from google.adk.runners import Runner -from google.adk.sessions import InMemorySessionService -from google.genai import types - - def create_inspector(): """Callback to capture finish_reason.""" captured = {"finish_reason": None} @@ -38,45 +32,18 @@ def inspector( return inspector -async def test(): - # Create model with low max_tokens to trigger truncation - model = LiteLlm( - model="gpt-3.5-turbo", - api_key=os.environ.get("OPENAI_API_KEY"), - max_tokens=50, # Intentionally low - ) - - inspector = create_inspector() - - agent = Agent( - model=model, - name="test", - instruction="Provide detailed explanations.", - after_model_callback=inspector, - ) - - session_service = InMemorySessionService() - runner = Runner(app_name="test", agent=agent, session_service=session_service) - - await session_service.create_session( - app_name="test", - user_id="user", - session_id="session", - state={}, - ) - - message = types.Content( - role="user", - parts=[types.Part(text="Explain quantum computing in detail.")], - ) - - async for _ in runner.run_async( - user_id="user", session_id="session", new_message=message - ): - pass - - print(f"finish_reason: {inspector.captured['finish_reason']}") +# Create model with low max_tokens to trigger truncation +model = LiteLlm( + model="gpt-3.5-turbo", + api_key=os.environ.get("OPENAI_API_KEY"), + max_tokens=50, # Intentionally low +) +inspector = create_inspector() -if __name__ == "__main__": - asyncio.run(test()) +agent = Agent( + model=model, + name="test", + instruction="Provide detailed explanations.", + after_model_callback=inspector, +) diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py new file mode 100644 index 0000000000..513453b6fd --- /dev/null +++ b/contributing/samples/litellm_reasoning_agent/main.py @@ -0,0 +1,48 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio + +from agent import agent, inspector +from google.adk.runners import Runner +from google.adk.sessions import InMemorySessionService +from google.genai import types + + +async def main(): + session_service = InMemorySessionService() + runner = Runner(app_name="test", agent=agent, session_service=session_service) + + await session_service.create_session( + app_name="test", + user_id="user", + session_id="session", + state={}, + ) + + message = types.Content( + role="user", + parts=[types.Part(text="Explain quantum computing in detail.")], + ) + + async for _ in runner.run_async( + user_id="user", session_id="session", new_message=message + ): + pass + + print(f"finish_reason: {inspector.captured['finish_reason']}") + + +if __name__ == "__main__": + asyncio.run(main()) From bb129529d6efc989073038c4c0360c91d908f70f Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 16:44:35 -0700 Subject: [PATCH 17/18] fix python format --- contributing/samples/litellm_reasoning_agent/agent.py | 2 ++ contributing/samples/litellm_reasoning_agent/main.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py index f6c953a5a7..9a5641614d 100644 --- a/contributing/samples/litellm_reasoning_agent/agent.py +++ b/contributing/samples/litellm_reasoning_agent/agent.py @@ -18,6 +18,8 @@ from google.adk.agents.callback_context import CallbackContext from google.adk.models.lite_llm import LiteLlm from google.adk.models.llm_response import LlmResponse + + def create_inspector(): """Callback to capture finish_reason.""" captured = {"finish_reason": None} diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py index 513453b6fd..f35c5e0617 100644 --- a/contributing/samples/litellm_reasoning_agent/main.py +++ b/contributing/samples/litellm_reasoning_agent/main.py @@ -14,7 +14,8 @@ import asyncio -from agent import agent, inspector +from agent import agent +from agent import inspector from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types From 7171a09663b694deddd5dce53b63446fd8318fc9 Mon Sep 17 00:00:00 2001 From: Eliza Huang Date: Tue, 28 Oct 2025 16:50:51 -0700 Subject: [PATCH 18/18] remove agent --- .../samples/litellm_reasoning_agent/README.md | 14 ----- .../samples/litellm_reasoning_agent/agent.py | 51 ------------------- .../samples/litellm_reasoning_agent/main.py | 49 ------------------ 3 files changed, 114 deletions(-) delete mode 100644 contributing/samples/litellm_reasoning_agent/README.md delete mode 100644 contributing/samples/litellm_reasoning_agent/agent.py delete mode 100644 contributing/samples/litellm_reasoning_agent/main.py diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md deleted file mode 100644 index 48575dc491..0000000000 --- a/contributing/samples/litellm_reasoning_agent/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# LiteLLM Reasoning Agent - -This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object. - -The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM. It sets `max_tokens=50` to force the model to stop execution due to length constraints. An `after_model_callback` is used to inspect the `response.finish_reason` and verify that it is `length`. - -## Running the test - -To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `main.py` script directly. - -```bash -export OPENAI_API_KEY="your-api-key-here" -python main.py -``` diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py deleted file mode 100644 index 9a5641614d..0000000000 --- a/contributing/samples/litellm_reasoning_agent/agent.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from google.adk.agents import Agent -from google.adk.agents.callback_context import CallbackContext -from google.adk.models.lite_llm import LiteLlm -from google.adk.models.llm_response import LlmResponse - - -def create_inspector(): - """Callback to capture finish_reason.""" - captured = {"finish_reason": None} - - def inspector( - callback_context: CallbackContext, llm_response: LlmResponse - ) -> LlmResponse: - captured["finish_reason"] = llm_response.finish_reason - return llm_response - - inspector.captured = captured - return inspector - - -# Create model with low max_tokens to trigger truncation -model = LiteLlm( - model="gpt-3.5-turbo", - api_key=os.environ.get("OPENAI_API_KEY"), - max_tokens=50, # Intentionally low -) - -inspector = create_inspector() - -agent = Agent( - model=model, - name="test", - instruction="Provide detailed explanations.", - after_model_callback=inspector, -) diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py deleted file mode 100644 index f35c5e0617..0000000000 --- a/contributing/samples/litellm_reasoning_agent/main.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio - -from agent import agent -from agent import inspector -from google.adk.runners import Runner -from google.adk.sessions import InMemorySessionService -from google.genai import types - - -async def main(): - session_service = InMemorySessionService() - runner = Runner(app_name="test", agent=agent, session_service=session_service) - - await session_service.create_session( - app_name="test", - user_id="user", - session_id="session", - state={}, - ) - - message = types.Content( - role="user", - parts=[types.Part(text="Explain quantum computing in detail.")], - ) - - async for _ in runner.run_async( - user_id="user", session_id="session", new_message=message - ): - pass - - print(f"finish_reason: {inspector.captured['finish_reason']}") - - -if __name__ == "__main__": - asyncio.run(main())