From cb92b70dac02a6e48ad0c21c9c6eabf14c44d2ce Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:37:19 -0700
Subject: [PATCH 01/18] fix: propagate finish_reason from LiteLLM responses

Fixes #3109

This change ensures that the finish_reason field from LiteLLM responses
is properly propagated to LlmResponse objects, enabling callbacks to
detect completion conditions like max_tokens truncation.

Changes:
- Extract finish_reason from LiteLLM response in lite_llm.py
- Update tracing.py to handle both enum (Gemini) and string (LiteLLM)
  finish_reason values
- Add comprehensive unit tests for finish_reason propagation

The fix allows after_model_callback functions to properly detect:
- "length": max_tokens limit reached
- "stop": natural completion
- "tool_calls": tool invocations
- "content_filter": filtered content
---
 src/google/adk/models/lite_llm.py      |   4 +
 src/google/adk/telemetry/tracing.py    |   6 +-
 tests/unittests/models/test_litellm.py | 140 +++++++++++++++++++++++++
 3 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 94d9831c39..e1f3d0d70f 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -494,13 +494,17 @@ def _model_response_to_generate_content_response(
   """
 
   message = None
+  finish_reason = None
   if response.get("choices", None):
     message = response["choices"][0].get("message", None)
+    finish_reason = response["choices"][0].get("finish_reason", None)
 
   if not message:
     raise ValueError("No message in response")
 
   llm_response = _message_to_generate_content_response(message)
+  if finish_reason:
+    llm_response.finish_reason = finish_reason
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 021471a5aa..8d3f841a1f 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -303,9 +303,13 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
+    if hasattr(llm_response.finish_reason, 'value'):
+      finish_reason_str = llm_response.finish_reason.value.lower()
+    else:
+      finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
-        [llm_response.finish_reason.value.lower()],
+        [finish_reason_str],
     )
 
 
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 2fbacc0f1b..1ab2f13ee7 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1903,3 +1903,143 @@ def test_non_gemini_litellm_no_warning():
     # Test with non-Gemini model
     LiteLlm(model="openai/gpt-4o")
     assert len(w) == 0
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_non_streaming(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Test response",
+              ),
+              finish_reason="length",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.content.role == "model"
+    assert response.content.parts[0].text == "Test response"
+    assert response.finish_reason == "length"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_stop(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='stop' is properly propagated."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Complete response",
+              ),
+              finish_reason="stop",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "stop"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_tool_calls(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='tool_calls' is properly propagated."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="",
+                  tool_calls=[
+                      ChatCompletionMessageToolCall(
+                          type="function",
+                          id="test_id",
+                          function=Function(
+                              name="test_function",
+                              arguments='{"arg": "value"}',
+                          ),
+                      )
+                  ],
+              ),
+              finish_reason="tool_calls",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "tool_calls"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_content_filter(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='content_filter' is properly propagated."""
+  mock_response_with_content_filter = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="",
+              ),
+              finish_reason="content_filter",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_content_filter
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "content_filter"
+
+  mock_acompletion.assert_called_once()

From 5c43ac2a97f208cf4ccabf5eba89576786ed11c0 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:45:32 -0700
Subject: [PATCH 02/18] refactor: address code review feedback

- Use .name instead of .value for enum finish_reason (more robust for IntEnum)
- Extract first choice using walrus operator for better readability
- Consolidate tests using @pytest.mark.parametrize to reduce duplication
- Strengthen test assertions to verify response content

All 53 tests pass.
---
 src/google/adk/models/lite_llm.py      |   8 +-
 src/google/adk/telemetry/tracing.py    |   4 +-
 tests/unittests/models/test_litellm.py | 162 +++++++------------------
 3 files changed, 54 insertions(+), 120 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index e1f3d0d70f..8d33cdd22c 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -495,9 +495,11 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if response.get("choices", None):
-    message = response["choices"][0].get("message", None)
-    finish_reason = response["choices"][0].get("finish_reason", None)
+  if response.get("choices", None) and (
+      first_choice := response["choices"][0]
+  ):
+    message = first_choice.get("message", None)
+    finish_reason = first_choice.get("finish_reason", None)
 
   if not message:
     raise ValueError("No message in response")
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 8d3f841a1f..42fa927e6c 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -303,8 +303,8 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if hasattr(llm_response.finish_reason, 'value'):
-      finish_reason_str = llm_response.finish_reason.value.lower()
+    if hasattr(llm_response.finish_reason, 'name'):
+      finish_reason_str = llm_response.finish_reason.name.lower()
     else:
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 1ab2f13ee7..000717d3d5 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1905,131 +1905,57 @@ def test_non_gemini_litellm_no_warning():
     assert len(w) == 0
 
 
+@pytest.mark.parametrize(
+    "finish_reason,response_content,expected_content,has_tool_calls",
+    [
+        ("length", "Test response", "Test response", False),
+        ("stop", "Complete response", "Complete response", False),
+        (
+            "tool_calls",
+            "",
+            "",
+            True,
+        ),
+        ("content_filter", "", "", False),
+    ],
+    ids=["length", "stop", "tool_calls", "content_filter"],
+)
 @pytest.mark.asyncio
-async def test_finish_reason_propagation_non_streaming(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="Test response",
-              ),
-              finish_reason="length",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.content.role == "model"
-    assert response.content.parts[0].text == "Test response"
-    assert response.finish_reason == "length"
-
-  mock_acompletion.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_finish_reason_propagation_stop(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason='stop' is properly propagated."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="Complete response",
-              ),
-              finish_reason="stop",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "stop"
-
-  mock_acompletion.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_finish_reason_propagation_tool_calls(
-    mock_acompletion, lite_llm_instance
+async def test_finish_reason_propagation(
+    mock_acompletion,
+    lite_llm_instance,
+    finish_reason,
+    response_content,
+    expected_content,
+    has_tool_calls,
 ):
-  """Test that finish_reason='tool_calls' is properly propagated."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="",
-                  tool_calls=[
-                      ChatCompletionMessageToolCall(
-                          type="function",
-                          id="test_id",
-                          function=Function(
-                              name="test_function",
-                              arguments='{"arg": "value"}',
-                          ),
-                      )
-                  ],
-              ),
-              finish_reason="tool_calls",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "tool_calls"
-
-  mock_acompletion.assert_called_once()
-
+  """Test that finish_reason is properly propagated from LiteLLM response."""
+  tool_calls = None
+  if has_tool_calls:
+    tool_calls = [
+        ChatCompletionMessageToolCall(
+            type="function",
+            id="test_id",
+            function=Function(
+                name="test_function",
+                arguments='{"arg": "value"}',
+            ),
+        )
+    ]
 
-@pytest.mark.asyncio
-async def test_finish_reason_content_filter(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason='content_filter' is properly propagated."""
-  mock_response_with_content_filter = ModelResponse(
+  mock_response = ModelResponse(
       choices=[
           Choices(
               message=ChatCompletionAssistantMessage(
                   role="assistant",
-                  content="",
+                  content=response_content,
+                  tool_calls=tool_calls,
               ),
-              finish_reason="content_filter",
+              finish_reason=finish_reason,
           )
       ]
   )
-  mock_acompletion.return_value = mock_response_with_content_filter
+  mock_acompletion.return_value = mock_response
 
   llm_request = LlmRequest(
       contents=[
@@ -2040,6 +1966,12 @@ async def test_finish_reason_content_filter(
   )
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "content_filter"
+    assert response.content.role == "model"
+    assert response.finish_reason == finish_reason
+    if expected_content:
+      assert response.content.parts[0].text == expected_content
+    if has_tool_calls:
+      assert len(response.content.parts) > 0
+      assert response.content.parts[-1].function_call.name == "test_function"
 
   mock_acompletion.assert_called_once()

From ac27a3aa6470c6bdbcf4260153d56aebbed321ce Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:51:42 -0700
Subject: [PATCH 03/18] Update src/google/adk/models/lite_llm.py

Addressing review comments

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/models/lite_llm.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 8d33cdd22c..4cbf0bdafe 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -495,9 +495,8 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if response.get("choices", None) and (
-      first_choice := response["choices"][0]
-  ):
+  if choices := response.get("choices"):
+    first_choice = choices[0]
     message = first_choice.get("message", None)
     finish_reason = first_choice.get("finish_reason", None)
 

From 3924bcb2ba6b475df3bd5776e57bee9490e16f2e Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:51:53 -0700
Subject: [PATCH 04/18] Update src/google/adk/telemetry/tracing.py

Addressing review comments

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/telemetry/tracing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 42fa927e6c..7f37106251 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -303,7 +303,7 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if hasattr(llm_response.finish_reason, 'name'):
+    if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
       finish_reason_str = str(llm_response.finish_reason).lower()

From ab6f5779e38a143b66a90e39c4710863dbff553f Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:55:13 -0700
Subject: [PATCH 05/18] fix: update finish_reason type hint to support both
 enum and string

Address type safety issue where finish_reason can be either:
- types.FinishReason enum (from Gemini responses)
- str (from LiteLLM responses)

Updated LlmResponse.finish_reason type hint to:
Optional[Union[types.FinishReason, str]]

This ensures type checkers correctly validate the dual nature of this
field across different model providers.

All 53 tests pass.
---
 src/google/adk/models/llm_response.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index 56eb6318c1..c0ab7f7b16 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -16,6 +16,7 @@
 
 from typing import Any
 from typing import Optional
+from typing import Union
 
 from google.genai import types
 from pydantic import alias_generators
@@ -77,8 +78,11 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
-  finish_reason: Optional[types.FinishReason] = None
-  """The finish reason of the response."""
+  finish_reason: Optional[Union[types.FinishReason, str]] = None
+  """The finish reason of the response.
+
+  Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM).
+  """
 
   error_code: Optional[str] = None
   """Error code if the response is an error. Code varies by model."""

From 79961122bf104b28d730f885aea97a81bb60e9a7 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:11:53 -0700
Subject: [PATCH 06/18] feat: map LiteLLM finish_reason strings to FinishReason
 enum

- Map finish_reason strings to proper FinishReason enum values in lite_llm.py
  - 'length' -> FinishReason.MAX_TOKENS
  - 'stop' -> FinishReason.STOP
  - 'tool_calls'/'function_call' -> FinishReason.STOP
  - 'content_filter' -> FinishReason.SAFETY
  - unknown values -> FinishReason.OTHER

- Add clarifying comment in tracing.py for string fallback path

- Update test_litellm.py to verify enum mapping:
  - Assert finish_reason is FinishReason enum instance
  - Verify correct enum values for each finish_reason string
  - Add test for unknown finish_reason mapping to OTHER

Benefits:
- Type consistency with Gemini native responses
- Avoids runtime warnings from string finish_reason
- Enables proper instanceof checks in callbacks
- Better integration with ADK telemetry
---
 src/google/adk/models/lite_llm.py      | 16 ++++++++-
 src/google/adk/telemetry/tracing.py    |  1 +
 tests/unittests/models/test_litellm.py | 49 +++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 4cbf0bdafe..95c1369245 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -505,7 +505,21 @@ def _model_response_to_generate_content_response(
 
   llm_response = _message_to_generate_content_response(message)
   if finish_reason:
-    llm_response.finish_reason = finish_reason
+    # Map LiteLLM finish_reason strings to FinishReason enum
+    # This provides type consistency with Gemini native responses and avoids warnings
+    finish_reason_str = str(finish_reason).lower()
+    if finish_reason_str == "length":
+      llm_response.finish_reason = types.FinishReason.MAX_TOKENS
+    elif finish_reason_str == "stop":
+      llm_response.finish_reason = types.FinishReason.STOP
+    elif "tool" in finish_reason_str or "function" in finish_reason_str:
+      # Handle tool_calls, function_call variants
+      llm_response.finish_reason = types.FinishReason.STOP
+    elif finish_reason_str == "content_filter":
+      llm_response.finish_reason = types.FinishReason.SAFETY
+    else:
+      # For unknown reasons, use OTHER
+      llm_response.finish_reason = types.FinishReason.OTHER
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 7f37106251..e4ee072345 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -306,6 +306,7 @@ def trace_call_llm(
     if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
+      # Fallback for string values (should not occur with LiteLLM after enum mapping)
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 000717d3d5..14eb97f617 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1967,7 +1967,17 @@ async def test_finish_reason_propagation(
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
     assert response.content.role == "model"
-    assert response.finish_reason == finish_reason
+    # Verify finish_reason is mapped to FinishReason enum, not raw string
+    assert isinstance(response.finish_reason, types.FinishReason)
+    # Verify correct enum mapping
+    if finish_reason == "length":
+      assert response.finish_reason == types.FinishReason.MAX_TOKENS
+    elif finish_reason == "stop":
+      assert response.finish_reason == types.FinishReason.STOP
+    elif finish_reason == "tool_calls":
+      assert response.finish_reason == types.FinishReason.STOP
+    elif finish_reason == "content_filter":
+      assert response.finish_reason == types.FinishReason.SAFETY
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:
@@ -1975,3 +1985,40 @@ async def test_finish_reason_propagation(
       assert response.content.parts[-1].function_call.name == "test_function"
 
   mock_acompletion.assert_called_once()
+
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_unknown_maps_to_other(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that unknown finish_reason values map to FinishReason.OTHER."""
+  mock_response = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Test response",
+              ),
+              finish_reason="unknown_reason_type",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.content.role == "model"
+    # Unknown finish_reason should map to OTHER
+    assert isinstance(response.finish_reason, types.FinishReason)
+    assert response.finish_reason == types.FinishReason.OTHER
+
+  mock_acompletion.assert_called_once()
+

From a1c09388e1d851293101afef0b18ea471d01c665 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:40:00 -0700
Subject: [PATCH 07/18] feat: map LiteLLM finish_reason strings to FinishReason
 enum

Maps LiteLLM finish_reason string values to proper FinishReason enum
for type consistency with Gemini native responses.

Changes:
- Add _FINISH_REASON_MAPPING dictionary for string->enum conversion
  - "length" -> FinishReason.MAX_TOKENS
  - "stop" -> FinishReason.STOP
  - "tool_calls"/"function_call" -> FinishReason.STOP
  - "content_filter" -> FinishReason.SAFETY
  - Unknown values -> FinishReason.OTHER (fallback)

- Update finish_reason type hint to Optional[FinishReason] (no Union needed)

- Update telemetry tracing to use .name for enum serialization

- Add explanatory comments:
  - Why tool_calls maps to STOP (no TOOL_CALL enum exists)
  - Docstring clarifies mapping applies to all model providers

Tests:
- test_finish_reason_propagation: verifies enum mapping for all values
- test_finish_reason_unknown_maps_to_other: verifies fallback behavior

Benefits:
- Type consistency: finish_reason is always FinishReason enum
- No runtime warnings from mixed types
- Enables proper isinstance() checks in callbacks
- Dictionary mapping improves maintainability
- Better integration with ADK telemetry
---
 src/google/adk/models/lite_llm.py     | 28 +++++++++++++++------------
 src/google/adk/models/llm_response.py |  5 +++--
 src/google/adk/telemetry/tracing.py   |  2 +-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 95c1369245..b1121acba2 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -64,6 +64,19 @@
 _NEW_LINE = "\n"
 _EXCLUDED_PART_FIELD = {"inline_data": {"data"}}
 
+# Mapping of LiteLLM finish_reason strings to FinishReason enum values
+# Note: tool_calls/function_call map to STOP because:
+# 1. FinishReason.TOOL_CALL enum does not exist (as of google-genai 0.8.0)
+# 2. Tool calls represent normal completion (model stopped to invoke tools)
+# 3. Gemini native responses use STOP for tool calls (see lite_llm.py:910)
+_FINISH_REASON_MAPPING = {
+    "length": types.FinishReason.MAX_TOKENS,
+    "stop": types.FinishReason.STOP,
+    "tool_calls": types.FinishReason.STOP,  # Normal completion with tool invocation
+    "function_call": types.FinishReason.STOP,  # Legacy function call variant
+    "content_filter": types.FinishReason.SAFETY,
+}
+
 
 class ChatCompletionFileUrlObject(TypedDict, total=False):
   file_data: str
@@ -508,18 +521,9 @@ def _model_response_to_generate_content_response(
     # Map LiteLLM finish_reason strings to FinishReason enum
     # This provides type consistency with Gemini native responses and avoids warnings
     finish_reason_str = str(finish_reason).lower()
-    if finish_reason_str == "length":
-      llm_response.finish_reason = types.FinishReason.MAX_TOKENS
-    elif finish_reason_str == "stop":
-      llm_response.finish_reason = types.FinishReason.STOP
-    elif "tool" in finish_reason_str or "function" in finish_reason_str:
-      # Handle tool_calls, function_call variants
-      llm_response.finish_reason = types.FinishReason.STOP
-    elif finish_reason_str == "content_filter":
-      llm_response.finish_reason = types.FinishReason.SAFETY
-    else:
-      # For unknown reasons, use OTHER
-      llm_response.finish_reason = types.FinishReason.OTHER
+    llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
+        finish_reason_str, types.FinishReason.OTHER
+    )
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index c0ab7f7b16..982127bb1d 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -78,10 +78,11 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
-  finish_reason: Optional[Union[types.FinishReason, str]] = None
+  finish_reason: Optional[types.FinishReason] = None
   """The finish reason of the response.
 
-  Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM).
+  Always a types.FinishReason enum. String values from underlying model providers
+  are mapped to corresponding enum values (with fallback to OTHER for unknown values).
   """
 
   error_code: Optional[str] = None
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index e4ee072345..d89dbfb575 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -306,7 +306,7 @@ def trace_call_llm(
     if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
-      # Fallback for string values (should not occur with LiteLLM after enum mapping)
+      # Defensive fallback for string values (should never occur - all values mapped to enum)
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',

From 692af95004ffa46d33facbeb2e946a560fb4ac79 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:49:45 -0700
Subject: [PATCH 08/18] refactor: address bot review suggestions

- Simplify tracing.py by removing isinstance check (always enum now)
- Refactor test assertions to use dictionary mapping instead of if/elif
- Reduce code duplication and improve readability

Addresses Gemini Code Assist bot suggestions:
- tracing.py: Direct .name access since finish_reason is always enum
- test_litellm.py: Dictionary mapping for cleaner test assertions
---
 src/google/adk/telemetry/tracing.py    |  7 ++-----
 tests/unittests/models/test_litellm.py | 19 +++++++++----------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index d89dbfb575..5366a8015b 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -303,11 +303,8 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if isinstance(llm_response.finish_reason, types.FinishReason):
-      finish_reason_str = llm_response.finish_reason.name.lower()
-    else:
-      # Defensive fallback for string values (should never occur - all values mapped to enum)
-      finish_reason_str = str(llm_response.finish_reason).lower()
+    # finish_reason is always FinishReason enum
+    finish_reason_str = llm_response.finish_reason.name.lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
         [finish_reason_str],
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 14eb97f617..85e6b72fdd 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1967,17 +1967,16 @@ async def test_finish_reason_propagation(
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
     assert response.content.role == "model"
-    # Verify finish_reason is mapped to FinishReason enum, not raw string
+    # Verify finish_reason is mapped to FinishReason enum
     assert isinstance(response.finish_reason, types.FinishReason)
-    # Verify correct enum mapping
-    if finish_reason == "length":
-      assert response.finish_reason == types.FinishReason.MAX_TOKENS
-    elif finish_reason == "stop":
-      assert response.finish_reason == types.FinishReason.STOP
-    elif finish_reason == "tool_calls":
-      assert response.finish_reason == types.FinishReason.STOP
-    elif finish_reason == "content_filter":
-      assert response.finish_reason == types.FinishReason.SAFETY
+    # Verify correct enum mapping using dictionary
+    expected_mapping = {
+        "length": types.FinishReason.MAX_TOKENS,
+        "stop": types.FinishReason.STOP,
+        "tool_calls": types.FinishReason.STOP,
+        "content_filter": types.FinishReason.SAFETY,
+    }
+    assert response.finish_reason == expected_mapping[finish_reason]
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:

From 538a5b068eb409966e5a0564afa3d66ff7c46a3f Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:56:55 -0700
Subject: [PATCH 09/18] refactor: use _FINISH_REASON_MAPPING directly in tests

Import and use the actual _FINISH_REASON_MAPPING from lite_llm instead of
duplicating it in tests. This ensures tests stay in sync with implementation
changes automatically.

Benefits:
- Single source of truth for finish_reason mappings
- Tests automatically reflect any future mapping changes
- Reduced code duplication
- Better maintainability

Addresses review comment:
https://github.com/google/adk-python/pull/3114#pullrequestreview-3338249498
---
 tests/unittests/models/test_litellm.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 85e6b72fdd..f15bebfe0a 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -19,6 +19,7 @@
 import warnings
 
 from google.adk.models.lite_llm import _content_to_message_param
+from google.adk.models.lite_llm import _FINISH_REASON_MAPPING
 from google.adk.models.lite_llm import _function_declaration_to_tool_param
 from google.adk.models.lite_llm import _get_content
 from google.adk.models.lite_llm import _message_to_generate_content_response
@@ -1969,14 +1970,8 @@ async def test_finish_reason_propagation(
     assert response.content.role == "model"
     # Verify finish_reason is mapped to FinishReason enum
     assert isinstance(response.finish_reason, types.FinishReason)
-    # Verify correct enum mapping using dictionary
-    expected_mapping = {
-        "length": types.FinishReason.MAX_TOKENS,
-        "stop": types.FinishReason.STOP,
-        "tool_calls": types.FinishReason.STOP,
-        "content_filter": types.FinishReason.SAFETY,
-    }
-    assert response.finish_reason == expected_mapping[finish_reason]
+    # Verify correct enum mapping using the actual mapping from lite_llm
+    assert response.finish_reason == _FINISH_REASON_MAPPING[finish_reason]
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:

From cb44fb436af49ce1be68ea48ede39ee457e39be6 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 21:06:39 -0700
Subject: [PATCH 10/18] refactor: remove unused Union import from
 llm_response.py

The Union type is no longer needed since finish_reason is always
a FinishReason enum (never a string after our mapping).

Addresses review comment:
https://github.com/google/adk-python/pull/3114#discussion_r2431044481
---
 src/google/adk/models/llm_response.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index 982127bb1d..fc5190a277 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -16,7 +16,6 @@
 
 from typing import Any
 from typing import Optional
-from typing import Union
 
 from google.genai import types
 from pydantic import alias_generators

From 0becccfd304b150490933a5954393dcc52de05a7 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Tue, 14 Oct 2025 21:09:45 -0700
Subject: [PATCH 11/18] Apply suggestion from @gemini-code-assist[bot]

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/models/lite_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index b1121acba2..f6fcaee279 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -508,7 +508,7 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if choices := response.get("choices"):
+  if (choices := response.get("choices")) and choices:
     first_choice = choices[0]
     message = first_choice.get("message", None)
     finish_reason = first_choice.get("finish_reason", None)

From 63d8b71423518a80e447f0d70d5050f74b6145ed Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 10:52:50 -0700
Subject: [PATCH 12/18] fix: apply review suggestions for litellm finish_reason

---
 src/google/adk/models/lite_llm.py      | 4 +++-
 tests/unittests/models/test_litellm.py | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index f6fcaee279..1964995b5c 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -72,7 +72,9 @@
 _FINISH_REASON_MAPPING = {
     "length": types.FinishReason.MAX_TOKENS,
     "stop": types.FinishReason.STOP,
-    "tool_calls": types.FinishReason.STOP,  # Normal completion with tool invocation
+    "tool_calls": (
+        types.FinishReason.STOP
+    ),  # Normal completion with tool invocation
     "function_call": types.FinishReason.STOP,  # Legacy function call variant
     "content_filter": types.FinishReason.SAFETY,
 }
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index f15bebfe0a..9d7e9494e6 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1981,7 +1981,6 @@ async def test_finish_reason_propagation(
   mock_acompletion.assert_called_once()
 
 
-
 @pytest.mark.asyncio
 async def test_finish_reason_unknown_maps_to_other(
     mock_acompletion, lite_llm_instance
@@ -2015,4 +2014,3 @@ async def test_finish_reason_unknown_maps_to_other(
     assert response.finish_reason == types.FinishReason.OTHER
 
   mock_acompletion.assert_called_once()
-

From 76a8f248bc27df749a7f2ea467eff1f23f4f973c Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 13:30:10 -0700
Subject: [PATCH 13/18] Add e2e test for litellm finish reason

---
 .../samples/litellm_reasoning_agent/README.md | 14 ++++
 .../samples/litellm_reasoning_agent/agent.py  | 71 +++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 contributing/samples/litellm_reasoning_agent/README.md
 create mode 100644 contributing/samples/litellm_reasoning_agent/agent.py

diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md
new file mode 100644
index 0000000000..6b62d386c0
--- /dev/null
+++ b/contributing/samples/litellm_reasoning_agent/README.md
@@ -0,0 +1,14 @@
+# Finish Reason Test Agent
+
+This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object.
+
+The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM. It sets `max_tokens=50` to force the model to stop execution due to length constraints. An `after_model_callback` is used to inspect the `response.finish_reason` and verify that it is `length`.
+
+## Running the test
+
+To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `agent.py` script directly.
+
+```bash
+export OPENAI_API_KEY="your-api-key-here"
+python agent.py
+```
diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
new file mode 100644
index 0000000000..a636e9c0ff
--- /dev/null
+++ b/contributing/samples/litellm_reasoning_agent/agent.py
@@ -0,0 +1,71 @@
+import asyncio
+import os
+from google.adk.agents import Agent
+from google.adk.runners import Runner
+from google.adk.agents.callback_context import CallbackContext
+from google.adk.models.lite_llm import LiteLlm
+from google.adk.models.llm_response import LlmResponse
+from google.adk.sessions import InMemorySessionService
+from google.genai import types
+
+
+def create_inspector():
+    """Callback to capture finish_reason."""
+    captured = {"finish_reason": None}
+
+    def inspector(callback_context: CallbackContext, llm_response: LlmResponse) -> LlmResponse:
+        captured["finish_reason"] = llm_response.finish_reason
+        return llm_response
+
+    inspector.captured = captured
+    return inspector
+
+
+async def test():
+    # Create model with low max_tokens to trigger truncation
+    model = LiteLlm(
+        model="gpt-3.5-turbo",
+        api_key=os.environ.get("OPENAI_API_KEY"),
+        max_tokens=50,  # Intentionally low
+    )
+
+    inspector = create_inspector()
+
+    agent = Agent(
+        model=model,
+        name="test",
+        instruction="Provide detailed explanations.",
+        after_model_callback=inspector,
+    )
+
+    session_service = InMemorySessionService()
+    runner = Runner(
+        app_name="test",
+        agent=agent,
+        session_service=session_service
+    )
+
+    await session_service.create_session(
+        app_name="test",
+        user_id="user",
+        session_id="session",
+        state={},
+    )
+
+    message = types.Content(
+        role="user",
+        parts=[types.Part(text="Explain quantum computing in detail.")]
+    )
+
+    async for _ in runner.run_async(
+        user_id="user",
+        session_id="session",
+        new_message=message
+    ):
+        pass
+
+    print(f"finish_reason: {inspector.captured['finish_reason']}")
+
+
+if __name__ == "__main__":
+    asyncio.run(test())

From c736928ebd85437c0a276b9b890ed0cf4761a579 Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 13:32:45 -0700
Subject: [PATCH 14/18] fix python formatting

---
 .../samples/litellm_reasoning_agent/agent.py  | 101 +++++++++---------
 1 file changed, 49 insertions(+), 52 deletions(-)

diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
index a636e9c0ff..6fa9ba89c5 100644
--- a/contributing/samples/litellm_reasoning_agent/agent.py
+++ b/contributing/samples/litellm_reasoning_agent/agent.py
@@ -1,71 +1,68 @@
 import asyncio
 import os
+
 from google.adk.agents import Agent
-from google.adk.runners import Runner
 from google.adk.agents.callback_context import CallbackContext
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.models.llm_response import LlmResponse
+from google.adk.runners import Runner
 from google.adk.sessions import InMemorySessionService
 from google.genai import types
 
 
 def create_inspector():
-    """Callback to capture finish_reason."""
-    captured = {"finish_reason": None}
+  """Callback to capture finish_reason."""
+  captured = {"finish_reason": None}
 
-    def inspector(callback_context: CallbackContext, llm_response: LlmResponse) -> LlmResponse:
-        captured["finish_reason"] = llm_response.finish_reason
-        return llm_response
+  def inspector(
+      callback_context: CallbackContext, llm_response: LlmResponse
+  ) -> LlmResponse:
+    captured["finish_reason"] = llm_response.finish_reason
+    return llm_response
 
-    inspector.captured = captured
-    return inspector
+  inspector.captured = captured
+  return inspector
 
 
 async def test():
-    # Create model with low max_tokens to trigger truncation
-    model = LiteLlm(
-        model="gpt-3.5-turbo",
-        api_key=os.environ.get("OPENAI_API_KEY"),
-        max_tokens=50,  # Intentionally low
-    )
-
-    inspector = create_inspector()
-
-    agent = Agent(
-        model=model,
-        name="test",
-        instruction="Provide detailed explanations.",
-        after_model_callback=inspector,
-    )
-
-    session_service = InMemorySessionService()
-    runner = Runner(
-        app_name="test",
-        agent=agent,
-        session_service=session_service
-    )
-
-    await session_service.create_session(
-        app_name="test",
-        user_id="user",
-        session_id="session",
-        state={},
-    )
-
-    message = types.Content(
-        role="user",
-        parts=[types.Part(text="Explain quantum computing in detail.")]
-    )
-
-    async for _ in runner.run_async(
-        user_id="user",
-        session_id="session",
-        new_message=message
-    ):
-        pass
-
-    print(f"finish_reason: {inspector.captured['finish_reason']}")
+  # Create model with low max_tokens to trigger truncation
+  model = LiteLlm(
+      model="gpt-3.5-turbo",
+      api_key=os.environ.get("OPENAI_API_KEY"),
+      max_tokens=50,  # Intentionally low
+  )
+
+  inspector = create_inspector()
+
+  agent = Agent(
+      model=model,
+      name="test",
+      instruction="Provide detailed explanations.",
+      after_model_callback=inspector,
+  )
+
+  session_service = InMemorySessionService()
+  runner = Runner(app_name="test", agent=agent, session_service=session_service)
+
+  await session_service.create_session(
+      app_name="test",
+      user_id="user",
+      session_id="session",
+      state={},
+  )
+
+  message = types.Content(
+      role="user",
+      parts=[types.Part(text="Explain quantum computing in detail.")],
+  )
+
+  async for _ in runner.run_async(
+      user_id="user", session_id="session", new_message=message
+  ):
+    pass
+
+  print(f"finish_reason: {inspector.captured['finish_reason']}")
 
 
 if __name__ == "__main__":
-    asyncio.run(test())
+  asyncio.run(test())

From 9622dee4b3304201f756f4588cf1445eb780235e Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 14:53:30 -0700
Subject: [PATCH 15/18] add license

---
 .../samples/litellm_reasoning_agent/agent.py       | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
index 6fa9ba89c5..0134be259e 100644
--- a/contributing/samples/litellm_reasoning_agent/agent.py
+++ b/contributing/samples/litellm_reasoning_agent/agent.py
@@ -1,3 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import asyncio
 import os
 

From b8b49fb307c2e51520f1df9981e98c685d2bb7e5 Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 16:42:38 -0700
Subject: [PATCH 16/18] fix sample reasoning agent

---
 .../samples/litellm_reasoning_agent/README.md |  6 +-
 .../samples/litellm_reasoning_agent/agent.py  | 59 ++++---------------
 .../samples/litellm_reasoning_agent/main.py   | 48 +++++++++++++++
 3 files changed, 64 insertions(+), 49 deletions(-)
 create mode 100644 contributing/samples/litellm_reasoning_agent/main.py

diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md
index 6b62d386c0..48575dc491 100644
--- a/contributing/samples/litellm_reasoning_agent/README.md
+++ b/contributing/samples/litellm_reasoning_agent/README.md
@@ -1,4 +1,4 @@
-# Finish Reason Test Agent
+# LiteLLM Reasoning Agent
 
 This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object.
 
@@ -6,9 +6,9 @@ The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM
 
 ## Running the test
 
-To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `agent.py` script directly.
+To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `main.py` script directly.
 
 ```bash
 export OPENAI_API_KEY="your-api-key-here"
-python agent.py
+python main.py
 ```
diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
index 0134be259e..f6c953a5a7 100644
--- a/contributing/samples/litellm_reasoning_agent/agent.py
+++ b/contributing/samples/litellm_reasoning_agent/agent.py
@@ -12,18 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import asyncio
 import os
 
 from google.adk.agents import Agent
 from google.adk.agents.callback_context import CallbackContext
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.models.llm_response import LlmResponse
-from google.adk.runners import Runner
-from google.adk.sessions import InMemorySessionService
-from google.genai import types
-
-
 def create_inspector():
   """Callback to capture finish_reason."""
   captured = {"finish_reason": None}
@@ -38,45 +32,18 @@ def inspector(
   return inspector
 
 
-async def test():
-  # Create model with low max_tokens to trigger truncation
-  model = LiteLlm(
-      model="gpt-3.5-turbo",
-      api_key=os.environ.get("OPENAI_API_KEY"),
-      max_tokens=50,  # Intentionally low
-  )
-
-  inspector = create_inspector()
-
-  agent = Agent(
-      model=model,
-      name="test",
-      instruction="Provide detailed explanations.",
-      after_model_callback=inspector,
-  )
-
-  session_service = InMemorySessionService()
-  runner = Runner(app_name="test", agent=agent, session_service=session_service)
-
-  await session_service.create_session(
-      app_name="test",
-      user_id="user",
-      session_id="session",
-      state={},
-  )
-
-  message = types.Content(
-      role="user",
-      parts=[types.Part(text="Explain quantum computing in detail.")],
-  )
-
-  async for _ in runner.run_async(
-      user_id="user", session_id="session", new_message=message
-  ):
-    pass
-
-  print(f"finish_reason: {inspector.captured['finish_reason']}")
+# Create model with low max_tokens to trigger truncation
+model = LiteLlm(
+    model="gpt-3.5-turbo",
+    api_key=os.environ.get("OPENAI_API_KEY"),
+    max_tokens=50,  # Intentionally low
+)
 
+inspector = create_inspector()
 
-if __name__ == "__main__":
-  asyncio.run(test())
+agent = Agent(
+    model=model,
+    name="test",
+    instruction="Provide detailed explanations.",
+    after_model_callback=inspector,
+)
diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py
new file mode 100644
index 0000000000..513453b6fd
--- /dev/null
+++ b/contributing/samples/litellm_reasoning_agent/main.py
@@ -0,0 +1,48 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+
+from agent import agent, inspector
+from google.adk.runners import Runner
+from google.adk.sessions import InMemorySessionService
+from google.genai import types
+
+
+async def main():
+  session_service = InMemorySessionService()
+  runner = Runner(app_name="test", agent=agent, session_service=session_service)
+
+  await session_service.create_session(
+      app_name="test",
+      user_id="user",
+      session_id="session",
+      state={},
+  )
+
+  message = types.Content(
+      role="user",
+      parts=[types.Part(text="Explain quantum computing in detail.")],
+  )
+
+  async for _ in runner.run_async(
+      user_id="user", session_id="session", new_message=message
+  ):
+    pass
+
+  print(f"finish_reason: {inspector.captured['finish_reason']}")
+
+
+if __name__ == "__main__":
+  asyncio.run(main())

From bb129529d6efc989073038c4c0360c91d908f70f Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 16:44:35 -0700
Subject: [PATCH 17/18] fix python format

---
 contributing/samples/litellm_reasoning_agent/agent.py | 2 ++
 contributing/samples/litellm_reasoning_agent/main.py  | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
index f6c953a5a7..9a5641614d 100644
--- a/contributing/samples/litellm_reasoning_agent/agent.py
+++ b/contributing/samples/litellm_reasoning_agent/agent.py
@@ -18,6 +18,8 @@
 from google.adk.agents.callback_context import CallbackContext
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.models.llm_response import LlmResponse
+
+
 def create_inspector():
   """Callback to capture finish_reason."""
   captured = {"finish_reason": None}
diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py
index 513453b6fd..f35c5e0617 100644
--- a/contributing/samples/litellm_reasoning_agent/main.py
+++ b/contributing/samples/litellm_reasoning_agent/main.py
@@ -14,7 +14,8 @@
 
 import asyncio
 
-from agent import agent, inspector
+from agent import agent
+from agent import inspector
 from google.adk.runners import Runner
 from google.adk.sessions import InMemorySessionService
 from google.genai import types

From 7171a09663b694deddd5dce53b63446fd8318fc9 Mon Sep 17 00:00:00 2001
From: Eliza Huang <heliza@google.com>
Date: Tue, 28 Oct 2025 16:50:51 -0700
Subject: [PATCH 18/18] remove agent

---
 .../samples/litellm_reasoning_agent/README.md | 14 -----
 .../samples/litellm_reasoning_agent/agent.py  | 51 -------------------
 .../samples/litellm_reasoning_agent/main.py   | 49 ------------------
 3 files changed, 114 deletions(-)
 delete mode 100644 contributing/samples/litellm_reasoning_agent/README.md
 delete mode 100644 contributing/samples/litellm_reasoning_agent/agent.py
 delete mode 100644 contributing/samples/litellm_reasoning_agent/main.py

diff --git a/contributing/samples/litellm_reasoning_agent/README.md b/contributing/samples/litellm_reasoning_agent/README.md
deleted file mode 100644
index 48575dc491..0000000000
--- a/contributing/samples/litellm_reasoning_agent/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# LiteLLM Reasoning Agent
-
-This sample contains a script to verify that the `finish_reason` from a LiteLLM model is correctly propagated to the `LlmResponse` object.
-
-The script is configured to use the `openai/gpt-3.5-turbo` model through LiteLLM. It sets `max_tokens=50` to force the model to stop execution due to length constraints. An `after_model_callback` is used to inspect the `response.finish_reason` and verify that it is `length`.
-
-## Running the test
-
-To run this sample, you will need to have an OpenAI API key set as an environment variable. Then, run the `main.py` script directly.
-
-```bash
-export OPENAI_API_KEY="your-api-key-here"
-python main.py
-```
diff --git a/contributing/samples/litellm_reasoning_agent/agent.py b/contributing/samples/litellm_reasoning_agent/agent.py
deleted file mode 100644
index 9a5641614d..0000000000
--- a/contributing/samples/litellm_reasoning_agent/agent.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from google.adk.agents import Agent
-from google.adk.agents.callback_context import CallbackContext
-from google.adk.models.lite_llm import LiteLlm
-from google.adk.models.llm_response import LlmResponse
-
-
-def create_inspector():
-  """Callback to capture finish_reason."""
-  captured = {"finish_reason": None}
-
-  def inspector(
-      callback_context: CallbackContext, llm_response: LlmResponse
-  ) -> LlmResponse:
-    captured["finish_reason"] = llm_response.finish_reason
-    return llm_response
-
-  inspector.captured = captured
-  return inspector
-
-
-# Create model with low max_tokens to trigger truncation
-model = LiteLlm(
-    model="gpt-3.5-turbo",
-    api_key=os.environ.get("OPENAI_API_KEY"),
-    max_tokens=50,  # Intentionally low
-)
-
-inspector = create_inspector()
-
-agent = Agent(
-    model=model,
-    name="test",
-    instruction="Provide detailed explanations.",
-    after_model_callback=inspector,
-)
diff --git a/contributing/samples/litellm_reasoning_agent/main.py b/contributing/samples/litellm_reasoning_agent/main.py
deleted file mode 100644
index f35c5e0617..0000000000
--- a/contributing/samples/litellm_reasoning_agent/main.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-
-from agent import agent
-from agent import inspector
-from google.adk.runners import Runner
-from google.adk.sessions import InMemorySessionService
-from google.genai import types
-
-
-async def main():
-  session_service = InMemorySessionService()
-  runner = Runner(app_name="test", agent=agent, session_service=session_service)
-
-  await session_service.create_session(
-      app_name="test",
-      user_id="user",
-      session_id="session",
-      state={},
-  )
-
-  message = types.Content(
-      role="user",
-      parts=[types.Part(text="Explain quantum computing in detail.")],
-  )
-
-  async for _ in runner.run_async(
-      user_id="user", session_id="session", new_message=message
-  ):
-    pass
-
-  print(f"finish_reason: {inspector.captured['finish_reason']}")
-
-
-if __name__ == "__main__":
-  asyncio.run(main())