Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/google/adk/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,12 +1104,12 @@ def _new_invocation_context_for_live(
live_request_queue: Optional[LiveRequestQueue] = None,
run_config: Optional[RunConfig] = None,
) -> InvocationContext:
"""Creates a new invocation context for live multi-agent."""
"""Creates a new invocation context for live single and multi-agent scenarios."""
run_config = run_config or RunConfig()

# For live multi-agent, we need model's text transcription as context for
# next agent.
if self.agent.sub_agents and live_request_queue:
# next agent. For single-agent, we need a general transcription support.
if live_request_queue:
if not run_config.response_modalities:
# default
run_config.response_modalities = ['AUDIO']
Expand All @@ -1123,7 +1123,8 @@ def _new_invocation_context_for_live(
types.AudioTranscriptionConfig()
)
if not run_config.input_audio_transcription:
# need this input transcription for agent transferring in live mode.
# need this input transcription for agent transferring in multi-agent live
# mode and for general transcription support in single agent live mode.
run_config.input_audio_transcription = types.AudioTranscriptionConfig()
return self._new_invocation_context(
session,
Expand Down
100 changes: 100 additions & 0 deletions tests/unittests/streaming/test_live_streaming_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,3 +642,103 @@ def test_streaming_with_context_window_compression_config():
llm_request_sent_to_mock.live_connect_config.context_window_compression.sliding_window.target_tokens
== 500
)


def test_single_agent_live_streaming_with_transcription():
"""Test single-agent streaming adds transcription configs when not provided."""
response1 = LlmResponse(
turn_complete=True,
)

mock_model = testing_utils.MockModel.create([response1])

root_agent = Agent(
name='single_agent',
model=mock_model,
tools=[],
)

runner = testing_utils.InMemoryRunner(root_agent=root_agent)

# Test without passing any run_config to verify default behavior
# The logic in _new_invocation_context_for_live should automatically add
# transcription configs for live streaming
live_request_queue = LiveRequestQueue()
live_request_queue.send_realtime(
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
)

res_events = runner.run_live(live_request_queue)

assert res_events is not None, 'Expected a list of events, got None.'
assert (
len(res_events) > 0
), 'Expected at least one response, but got an empty list.'
assert len(mock_model.requests) == 1

# Get the request that was captured
llm_request_sent_to_mock = mock_model.requests[0]

# Assert that transcription configs were added
assert llm_request_sent_to_mock.live_connect_config is not None
assert (
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
is not None
)
assert (
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
is not None
)


def test_single_agent_live_streaming_respects_explicit_transcription():
"""Test that single-agent live streaming respects explicitly provided transcription configs."""
response1 = LlmResponse(
turn_complete=True,
)

mock_model = testing_utils.MockModel.create([response1])

# Create a single agent (no sub_agents)
root_agent = Agent(
name='single_agent',
model=mock_model,
tools=[],
)

runner = testing_utils.InMemoryRunner(root_agent=root_agent)

# Create run config with input and output audio transcription
explicit_output_config = types.AudioTranscriptionConfig()
explicit_input_config = types.AudioTranscriptionConfig()
run_config = RunConfig(
output_audio_transcription=explicit_output_config,
input_audio_transcription=explicit_input_config,
)

live_request_queue = LiveRequestQueue()
live_request_queue.send_realtime(
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
)

res_events = runner.run_live(live_request_queue, run_config)

assert res_events is not None, 'Expected a list of events, got None.'
assert (
len(res_events) > 0
), 'Expected at least one response, but got an empty list.'
assert len(mock_model.requests) == 1

# Get the request that was captured
llm_request_sent_to_mock = mock_model.requests[0]

# Assert that the explicit configs were used
assert llm_request_sent_to_mock.live_connect_config is not None
assert (
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
is explicit_output_config
)
assert (
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
is explicit_input_config
)