Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
fea8506
Fix truncation tests to handle actual behavior of _init_history()
openhands-agent Mar 11, 2025
4a6e463
Fix truncation state handling to preserve start_id
openhands-agent Mar 11, 2025
71b352f
Improve fix: Store original_start_id earlier and simplify variable names
openhands-agent Mar 11, 2025
3f360c0
Fix issue where start_id gets set to truncation_id after _init_history()
openhands-agent Mar 11, 2025
57d0d34
Fix recursion issue in test_truncation.py
openhands-agent Mar 11, 2025
5318a01
Remove unused debugging code in test_truncation.py
openhands-agent Mar 11, 2025
a677f46
Fix test_truncation_state_persistence_with_real_stores to use correct…
openhands-agent Mar 11, 2025
63261ec
Remove redundant test_truncation_state_persistence_with_mocks test
openhands-agent Mar 11, 2025
913150d
Remove redundant test_history_restoration_after_truncation test
openhands-agent Mar 11, 2025
3f4d571
Make truncation_id assertion more precise in test_truncation_state_pe…
openhands-agent Mar 11, 2025
441ed90
Make truncation_id assertion exact in test_truncation_state_persisten…
openhands-agent Mar 11, 2025
5f413a2
Fix event ID explanation in truncation test
openhands-agent Mar 11, 2025
a04d5f2
Fix test_truncation_state_persistence_with_real_stores to avoid event…
openhands-agent Mar 11, 2025
74d32f7
Fix test_truncation_state_persistence_with_real_stores to use same ev…
openhands-agent Mar 11, 2025
8e3dfe1
Improve test_truncation_state_persistence_with_real_stores to initial…
openhands-agent Mar 11, 2025
6f21810
Fix test_truncation_state_persistence_with_real_stores test to proper…
openhands-agent Mar 11, 2025
e2b1831
Fix test_truncation_state_persistence_with_real_stores to use the sam…
openhands-agent Mar 11, 2025
f34a47b
Improve test_truncation.py
openhands-agent Mar 12, 2025
ed6d15a
Fix formatting in test_truncation.py
openhands-agent Mar 12, 2025
38b072e
Update comment to say only 'Restore the original first message id'
openhands-agent Mar 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions openhands/controller/agent_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,9 @@ def _init_history(self) -> None:
else self.event_stream.get_latest_event_id()
)

# Store the original start_id to preserve it
original_start_id = self.state.start_id

# sanity check
if start_id > end_id + 1:
self.log(
Expand Down Expand Up @@ -998,8 +1001,8 @@ def _init_history(self) -> None:
else:
self.state.history = events

# make sure history is in sync
self.state.start_id = start_id
# Restore the original first message id
self.state.start_id = original_start_id

def _handle_long_context_error(self) -> None:
# When context window is exceeded, keep roughly half of agent interactions
Expand Down
269 changes: 231 additions & 38 deletions tests/unit/test_truncation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def mock_agent():
agent = MagicMock()
agent.llm = MagicMock()
agent.llm.config = MagicMock()
# Set max_message_chars to a specific value to avoid comparison issues
agent.llm.config.max_message_chars = 1000
agent.llm.metrics = MagicMock()
return agent


Expand Down Expand Up @@ -178,61 +181,251 @@ def test_context_window_exceeded_handling(self, mock_event_stream, mock_agent):
assert controller.state.truncation_id is not None
assert controller.state.truncation_id > controller.state.start_id

def test_history_restoration_after_truncation(self, mock_event_stream, mock_agent):
controller = AgentController(
agent=mock_agent,
event_stream=mock_event_stream,
max_iterations=10,
sid='test_truncation',
confirmation_mode=False,
headless_mode=True,
)
def test_truncation_state_persistence_with_real_stores(self, mock_agent):
"""
Test that truncation state is properly saved and restored across sessions using real in-memory stores.

This test verifies the complete lifecycle of truncation:
1. Creating a session with many events
2. Forcing truncation
3. Saving state to persistent storage
4. Restoring state in a new session
5. Verifying all IDs and events are preserved correctly
6. Adding more events and forcing truncation again
7. Verifying the system maintains correct state across multiple truncations

The test uses real in-memory stores instead of mocks to ensure the actual behavior
is tested end-to-end.
"""
# Use an in-memory file store for state persistence
from openhands.storage.memory import InMemoryFileStore

file_store = InMemoryFileStore()

# Create events with IDs
# Use a real event stream with in-memory storage
from openhands.events import EventStream

# Create a real event stream with the in-memory file store
event_stream = EventStream(sid='test_persistence', file_store=file_store)

# PHASE 1: Create initial session with events

# Create first user message and add it to the event stream
first_msg = MessageAction(content='Start task', wait_for_response=False)
first_msg._source = EventSource.USER
first_msg._id = 1
event_stream.add_event(first_msg, EventSource.USER)
first_msg_id = first_msg.id

# Create a large number of events (100 pairs of commands and observations)
events = [first_msg]
for i in range(5):
for i in range(100):
cmd = CmdRunAction(command=f'cmd{i}')
cmd._id = i + 2
event_stream.add_event(cmd, EventSource.AGENT)

obs = CmdOutputObservation(
command=f'cmd{i}', content=f'output{i}', command_id=cmd._id
command=f'cmd{i}', content=f'output{i}', command_id=cmd.id
)
obs._cause = cmd._id
event_stream.add_event(obs, EventSource.ENVIRONMENT)

events.extend([cmd, obs])

# Set up initial history
controller.state.history = events.copy()
total_events_count = len(events)

# Create the first controller with all events in the stream
controller1 = AgentController(
agent=mock_agent,
event_stream=event_stream,
max_iterations=10,
sid='test_persistence',
confirmation_mode=False,
headless_mode=True,
)

# Verify initial state before truncation
assert controller1.state.start_id == first_msg_id
# The truncation_id might be initialized to -1 in the State class, so check for that
assert (
controller1.state.truncation_id == -1
or controller1.state.truncation_id is None
)
assert len(controller1.state.history) == total_events_count

# PHASE 2: Force truncation and verify immediate effects

# Force truncation
controller.state.history = controller._apply_conversation_window(
controller.state.history
controller1._handle_long_context_error()

# Calculate expected truncation ID based on the algorithm in _apply_conversation_window
# The midpoint index is approximately len(events) // 2
# For 201 events (1 initial + 100 pairs), midpoint is around 100
# Since the truncation algorithm preserves action-observation pairs,
# the truncation_id should be set to the ID of an event near the midpoint
expected_truncation_id = 100 # This is the ID of the event at the midpoint

# Verify truncation occurred with correct truncation_id
assert controller1.state.truncation_id is not None
assert controller1.state.truncation_id == expected_truncation_id

# Verify start_id is still preserved after truncation
assert controller1.state.start_id == first_msg_id

# Verify history was cut approximately in half
truncated_history_len = len(controller1.state.history)
assert truncated_history_len < total_events_count
assert (
truncated_history_len >= total_events_count * 0.4
) # Should be roughly half
assert (
truncated_history_len <= total_events_count * 0.6
) # Allow some flexibility

# Verify first message is still in history after truncation
assert first_msg in controller1.state.history

# Verify all events in truncated history are either the first message or after truncation_id
for event in controller1.state.history:
if event.id != first_msg_id:
assert event.id >= controller1.state.truncation_id

# PHASE 3: Save state and create a new session

# Save state to persistent storage
controller1.state.save_to_session('test_persistence', file_store)

# Store values for comparison after restoration
truncation_id = controller1.state.truncation_id
start_id = controller1.state.start_id

# Close the first controller
asyncio.run(controller1.close())

# PHASE 4: Create new controller and restore state

# Create a new controller with the same event stream and session ID
controller2 = AgentController(
agent=mock_agent,
event_stream=event_stream,
max_iterations=10,
sid='test_persistence',
confirmation_mode=False,
headless_mode=True,
)

# Save state
saved_start_id = controller.state.start_id
saved_truncation_id = controller.state.truncation_id
saved_history_len = len(controller.state.history)
# Restore state from persistent storage
from openhands.controller.state.state import State

# Set up mock event stream for new controller
mock_event_stream.get_events.return_value = controller.state.history
controller2.state = State.restore_from_session('test_persistence', file_store)

# Verify state was restored correctly before initializing history
assert controller2.state.truncation_id == truncation_id
assert controller2.state.start_id == start_id

# Initialize history from the event stream
controller2._init_history()

# PHASE 5: Verify restored state and history

# Verify IDs are still preserved after _init_history
assert controller2.state.truncation_id == truncation_id
assert controller2.state.start_id == start_id

# Calculate expected minimum length: events from truncation_id onwards + first message
events_from_truncation_id = [e for e in events if e.id >= truncation_id]
expected_min_length = 1 + len(events_from_truncation_id) # 1 for first_msg

# Verify the history was loaded correctly
assert len(controller2.state.history) >= expected_min_length
assert first_msg in controller2.state.history

# Verify the truncated history contains the right events
for event in controller2.state.history:
if event.id != first_msg_id: # Skip first message which is always included
assert (
event.id >= controller2.state.truncation_id
), f'Event {event.id} is before truncation_id {controller2.state.truncation_id}'

# PHASE 6: Add more events and force another truncation

# Add 50 more pairs of events
for i in range(100, 150):
cmd = CmdRunAction(command=f'cmd{i}')
event_stream.add_event(cmd, EventSource.AGENT)

obs = CmdOutputObservation(
command=f'cmd{i}', content=f'output{i}', command_id=cmd.id
)
event_stream.add_event(obs, EventSource.ENVIRONMENT)

# Add to controller's history directly to simulate normal operation
controller2.state.history.extend([cmd, obs])

# Create new controller with saved state
new_controller = AgentController(
# Store history length before second truncation
history_len_before_second_truncation = len(controller2.state.history)

# Force another truncation
controller2._handle_long_context_error()

# Verify second truncation occurred
assert len(controller2.state.history) < history_len_before_second_truncation

# Verify start_id is still preserved after second truncation
assert controller2.state.start_id == first_msg_id

# Verify truncation_id has been updated to a higher value
assert controller2.state.truncation_id > truncation_id

# Verify first message is still in history after second truncation
assert first_msg in controller2.state.history

# PHASE 7: Save state again and create a third session

# Save state after second truncation
controller2.state.save_to_session('test_persistence', file_store)

# Store values for comparison after second restoration
second_truncation_id = controller2.state.truncation_id

# Close the second controller
asyncio.run(controller2.close())

# Create a third controller
controller3 = AgentController(
agent=mock_agent,
event_stream=mock_event_stream,
event_stream=event_stream,
max_iterations=10,
sid='test_truncation',
sid='test_persistence',
confirmation_mode=False,
headless_mode=True,
)
new_controller.state.start_id = saved_start_id
new_controller.state.truncation_id = saved_truncation_id
new_controller.state.history = mock_event_stream.get_events()

# Verify restoration
assert len(new_controller.state.history) == saved_history_len
assert new_controller.state.history[0] == first_msg
assert new_controller.state.start_id == saved_start_id

# Restore state from persistent storage
controller3.state = State.restore_from_session('test_persistence', file_store)

# Verify state was restored correctly before initializing history
assert controller3.state.truncation_id == second_truncation_id
assert controller3.state.start_id == first_msg_id

# Initialize history from the event stream
controller3._init_history()

# PHASE 8: Verify final state after multiple truncations

# Verify IDs are still preserved after multiple truncations
assert controller3.state.truncation_id == second_truncation_id
assert controller3.state.start_id == first_msg_id

# Verify first message is still in history
assert first_msg in controller3.state.history

# Verify all events in final history are either the first message or after second truncation_id
for event in controller3.state.history:
if event.id != first_msg_id:
assert event.id >= controller3.state.truncation_id

# Clean up resources
asyncio.run(controller3.close())
event_stream.close()

# Note: There may still be a RuntimeWarning about 'coroutine AgentController._on_event was never awaited'
# This is expected and doesn't affect the test results. The warning occurs because the event handling
# in AgentController.on_event uses asyncio.get_event_loop().run_until_complete() which can leave
# some coroutines unresolved when the test ends.