enyst · enyst · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
@@ -902,6 +902,9 @@ def _init_history(self) -> None:
             else self.event_stream.get_latest_event_id()
         )
 
+        # Store the original start_id to preserve it
+        original_start_id = self.state.start_id
+
         # sanity check
         if start_id > end_id + 1:
             self.log(
@@ -998,8 +1001,8 @@ def _init_history(self) -> None:
         else:
             self.state.history = events
 
-        # make sure history is in sync
-        self.state.start_id = start_id
+        # Restore the original first message id
+        self.state.start_id = original_start_id
 
     def _handle_long_context_error(self) -> None:
         # When context window is exceeded, keep roughly half of agent interactions

diff --git a/tests/unit/test_truncation.py b/tests/unit/test_truncation.py
@@ -24,6 +24,9 @@ def mock_agent():
     agent = MagicMock()
     agent.llm = MagicMock()
     agent.llm.config = MagicMock()
+    # Set max_message_chars to a specific value to avoid comparison issues
+    agent.llm.config.max_message_chars = 1000
+    agent.llm.metrics = MagicMock()
     return agent
 
 
@@ -178,61 +181,251 @@ def test_context_window_exceeded_handling(self, mock_event_stream, mock_agent):
         assert controller.state.truncation_id is not None
         assert controller.state.truncation_id > controller.state.start_id
 
-    def test_history_restoration_after_truncation(self, mock_event_stream, mock_agent):
-        controller = AgentController(
-            agent=mock_agent,
-            event_stream=mock_event_stream,
-            max_iterations=10,
-            sid='test_truncation',
-            confirmation_mode=False,
-            headless_mode=True,
-        )
+    def test_truncation_state_persistence_with_real_stores(self, mock_agent):
+        """
+        Test that truncation state is properly saved and restored across sessions using real in-memory stores.
+
+        This test verifies the complete lifecycle of truncation:
+        1. Creating a session with many events
+        2. Forcing truncation
+        3. Saving state to persistent storage
+        4. Restoring state in a new session
+        5. Verifying all IDs and events are preserved correctly
+        6. Adding more events and forcing truncation again
+        7. Verifying the system maintains correct state across multiple truncations
+
+        The test uses real in-memory stores instead of mocks to ensure the actual behavior
+        is tested end-to-end.
+        """
+        # Use an in-memory file store for state persistence
+        from openhands.storage.memory import InMemoryFileStore
+
+        file_store = InMemoryFileStore()
 
-        # Create events with IDs
+        # Use a real event stream with in-memory storage
+        from openhands.events import EventStream
+
+        # Create a real event stream with the in-memory file store
+        event_stream = EventStream(sid='test_persistence', file_store=file_store)
+
+        # PHASE 1: Create initial session with events
+
+        # Create first user message and add it to the event stream
         first_msg = MessageAction(content='Start task', wait_for_response=False)
-        first_msg._source = EventSource.USER
-        first_msg._id = 1
+        event_stream.add_event(first_msg, EventSource.USER)
+        first_msg_id = first_msg.id
 
+        # Create a large number of events (100 pairs of commands and observations)
         events = [first_msg]
-        for i in range(5):
+        for i in range(100):
             cmd = CmdRunAction(command=f'cmd{i}')
-            cmd._id = i + 2
+            event_stream.add_event(cmd, EventSource.AGENT)
+
             obs = CmdOutputObservation(
-                command=f'cmd{i}', content=f'output{i}', command_id=cmd._id
+                command=f'cmd{i}', content=f'output{i}', command_id=cmd.id
             )
-            obs._cause = cmd._id
+            event_stream.add_event(obs, EventSource.ENVIRONMENT)
+
             events.extend([cmd, obs])
 
-        # Set up initial history
-        controller.state.history = events.copy()
+        total_events_count = len(events)
+
+        # Create the first controller with all events in the stream
+        controller1 = AgentController(
+            agent=mock_agent,
+            event_stream=event_stream,
+            max_iterations=10,
+            sid='test_persistence',
+            confirmation_mode=False,
+            headless_mode=True,
+        )
+
+        # Verify initial state before truncation
+        assert controller1.state.start_id == first_msg_id
+        # The truncation_id might be initialized to -1 in the State class, so check for that
+        assert (
+            controller1.state.truncation_id == -1
+            or controller1.state.truncation_id is None
+        )
+        assert len(controller1.state.history) == total_events_count
+
+        # PHASE 2: Force truncation and verify immediate effects
 
         # Force truncation
-        controller.state.history = controller._apply_conversation_window(
-            controller.state.history
+        controller1._handle_long_context_error()
+
+        # Calculate expected truncation ID based on the algorithm in _apply_conversation_window
+        # The midpoint index is approximately len(events) // 2
+        # For 201 events (1 initial + 100 pairs), midpoint is around 100
+        # Since the truncation algorithm preserves action-observation pairs,
+        # the truncation_id should be set to the ID of an event near the midpoint
+        expected_truncation_id = 100  # This is the ID of the event at the midpoint
+
+        # Verify truncation occurred with correct truncation_id
+        assert controller1.state.truncation_id is not None
+        assert controller1.state.truncation_id == expected_truncation_id
+
+        # Verify start_id is still preserved after truncation
+        assert controller1.state.start_id == first_msg_id
+
+        # Verify history was cut approximately in half
+        truncated_history_len = len(controller1.state.history)
+        assert truncated_history_len < total_events_count
+        assert (
+            truncated_history_len >= total_events_count * 0.4
+        )  # Should be roughly half
+        assert (
+            truncated_history_len <= total_events_count * 0.6
+        )  # Allow some flexibility
+
+        # Verify first message is still in history after truncation
+        assert first_msg in controller1.state.history
+
+        # Verify all events in truncated history are either the first message or after truncation_id
+        for event in controller1.state.history:
+            if event.id != first_msg_id:
+                assert event.id >= controller1.state.truncation_id
+
+        # PHASE 3: Save state and create a new session
+
+        # Save state to persistent storage
+        controller1.state.save_to_session('test_persistence', file_store)
+
+        # Store values for comparison after restoration
+        truncation_id = controller1.state.truncation_id
+        start_id = controller1.state.start_id
+
+        # Close the first controller
+        asyncio.run(controller1.close())
+
+        # PHASE 4: Create new controller and restore state
+
+        # Create a new controller with the same event stream and session ID
+        controller2 = AgentController(
+            agent=mock_agent,
+            event_stream=event_stream,
+            max_iterations=10,
+            sid='test_persistence',
+            confirmation_mode=False,
+            headless_mode=True,
         )
 
-        # Save state
-        saved_start_id = controller.state.start_id
-        saved_truncation_id = controller.state.truncation_id
-        saved_history_len = len(controller.state.history)
+        # Restore state from persistent storage
+        from openhands.controller.state.state import State
 
-        # Set up mock event stream for new controller
-        mock_event_stream.get_events.return_value = controller.state.history
+        controller2.state = State.restore_from_session('test_persistence', file_store)
+
+        # Verify state was restored correctly before initializing history
+        assert controller2.state.truncation_id == truncation_id
+        assert controller2.state.start_id == start_id
+
+        # Initialize history from the event stream
+        controller2._init_history()
+
+        # PHASE 5: Verify restored state and history
+
+        # Verify IDs are still preserved after _init_history
+        assert controller2.state.truncation_id == truncation_id
+        assert controller2.state.start_id == start_id
+
+        # Calculate expected minimum length: events from truncation_id onwards + first message
+        events_from_truncation_id = [e for e in events if e.id >= truncation_id]
+        expected_min_length = 1 + len(events_from_truncation_id)  # 1 for first_msg
+
+        # Verify the history was loaded correctly
+        assert len(controller2.state.history) >= expected_min_length
+        assert first_msg in controller2.state.history
+
+        # Verify the truncated history contains the right events
+        for event in controller2.state.history:
+            if event.id != first_msg_id:  # Skip first message which is always included
+                assert (
+                    event.id >= controller2.state.truncation_id
+                ), f'Event {event.id} is before truncation_id {controller2.state.truncation_id}'
+
+        # PHASE 6: Add more events and force another truncation
+
+        # Add 50 more pairs of events
+        for i in range(100, 150):
+            cmd = CmdRunAction(command=f'cmd{i}')
+            event_stream.add_event(cmd, EventSource.AGENT)
+
+            obs = CmdOutputObservation(
+                command=f'cmd{i}', content=f'output{i}', command_id=cmd.id
+            )
+            event_stream.add_event(obs, EventSource.ENVIRONMENT)
+
+            # Add to controller's history directly to simulate normal operation
+            controller2.state.history.extend([cmd, obs])
 
-        # Create new controller with saved state
-        new_controller = AgentController(
+        # Store history length before second truncation
+        history_len_before_second_truncation = len(controller2.state.history)
+
+        # Force another truncation
+        controller2._handle_long_context_error()
+
+        # Verify second truncation occurred
+        assert len(controller2.state.history) < history_len_before_second_truncation
+
+        # Verify start_id is still preserved after second truncation
+        assert controller2.state.start_id == first_msg_id
+
+        # Verify truncation_id has been updated to a higher value
+        assert controller2.state.truncation_id > truncation_id
+
+        # Verify first message is still in history after second truncation
+        assert first_msg in controller2.state.history
+
+        # PHASE 7: Save state again and create a third session
+
+        # Save state after second truncation
+        controller2.state.save_to_session('test_persistence', file_store)
+
+        # Store values for comparison after second restoration
+        second_truncation_id = controller2.state.truncation_id
+
+        # Close the second controller
+        asyncio.run(controller2.close())
+
+        # Create a third controller
+        controller3 = AgentController(
             agent=mock_agent,
-            event_stream=mock_event_stream,
+            event_stream=event_stream,
             max_iterations=10,
-            sid='test_truncation',
+            sid='test_persistence',
             confirmation_mode=False,
             headless_mode=True,
         )
-        new_controller.state.start_id = saved_start_id
-        new_controller.state.truncation_id = saved_truncation_id
-        new_controller.state.history = mock_event_stream.get_events()
-
-        # Verify restoration
-        assert len(new_controller.state.history) == saved_history_len
-        assert new_controller.state.history[0] == first_msg
-        assert new_controller.state.start_id == saved_start_id
+
+        # Restore state from persistent storage
+        controller3.state = State.restore_from_session('test_persistence', file_store)
+
+        # Verify state was restored correctly before initializing history
+        assert controller3.state.truncation_id == second_truncation_id
+        assert controller3.state.start_id == first_msg_id
+
+        # Initialize history from the event stream
+        controller3._init_history()
+
+        # PHASE 8: Verify final state after multiple truncations
+
+        # Verify IDs are still preserved after multiple truncations
+        assert controller3.state.truncation_id == second_truncation_id
+        assert controller3.state.start_id == first_msg_id
+
+        # Verify first message is still in history
+        assert first_msg in controller3.state.history
+
+        # Verify all events in final history are either the first message or after second truncation_id
+        for event in controller3.state.history:
+            if event.id != first_msg_id:
+                assert event.id >= controller3.state.truncation_id
+
+        # Clean up resources
+        asyncio.run(controller3.close())
+        event_stream.close()
+
+        # Note: There may still be a RuntimeWarning about 'coroutine AgentController._on_event was never awaited'
+        # This is expected and doesn't affect the test results. The warning occurs because the event handling
+        # in AgentController.on_event uses asyncio.get_event_loop().run_until_complete() which can leave
+        # some coroutines unresolved when the test ends.