From 0e68e9b7f4dd64d1b4b28feb4050e4b4fd85fb9d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 4 Apr 2024 17:15:35 +0100 Subject: [PATCH] Fix bug in calculating state for non-gappy syncs (#16942) Unfortunately, the optimisation we applied here for non-gappy syncs is not actually valid. Fixes https://github.com/element-hq/synapse/issues/16941. ~~Based on https://github.com/element-hq/synapse/pull/16930.~~ Requires https://github.com/matrix-org/sytest/pull/1374. --- changelog.d/16930.bugfix | 2 +- changelog.d/16932.bugfix | 2 +- changelog.d/16942.bugfix | 1 + synapse/handlers/sync.py | 91 +++++++++++++------------------ tests/handlers/test_sync.py | 105 ++++++++++++++++++++++++++++++++++++ 5 files changed, 145 insertions(+), 56 deletions(-) create mode 100644 changelog.d/16942.bugfix diff --git a/changelog.d/16930.bugfix b/changelog.d/16930.bugfix index 21f964ef97..99ed435d75 100644 --- a/changelog.d/16930.bugfix +++ b/changelog.d/16930.bugfix @@ -1 +1 @@ -Fix a long-standing bug which could cause state to be omitted from `/sync` responses when certain events are filtered out of the timeline. +Fix various long-standing bugs which could cause incorrect state to be returned from `/sync` in certain situations. diff --git a/changelog.d/16932.bugfix b/changelog.d/16932.bugfix index 624388ea8e..99ed435d75 100644 --- a/changelog.d/16932.bugfix +++ b/changelog.d/16932.bugfix @@ -1 +1 @@ -Fix a long-standing bug which could cause incorrect state to be returned from `/sync` for rooms where the user has left. \ No newline at end of file +Fix various long-standing bugs which could cause incorrect state to be returned from `/sync` in certain situations. diff --git a/changelog.d/16942.bugfix b/changelog.d/16942.bugfix new file mode 100644 index 0000000000..99ed435d75 --- /dev/null +++ b/changelog.d/16942.bugfix @@ -0,0 +1 @@ +Fix various long-standing bugs which could cause incorrect state to be returned from `/sync` in certain situations. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 773e291aa8..554c820f79 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1195,6 +1195,8 @@ async def _compute_state_delta_for_full_sync( ) if batch: + # Strictly speaking, this returns the state *after* the first event in the + # timeline, but that is good enough here. state_at_timeline_start = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[0].event_id, @@ -1257,25 +1259,25 @@ async def _compute_state_delta_for_incremental_sync( await_full_state = True lazy_load_members = False - if batch.limited: - if batch: - state_at_timeline_start = ( - await self._state_storage_controller.get_state_ids_for_event( - batch.events[0].event_id, - state_filter=state_filter, - await_full_state=await_full_state, - ) - ) - else: - # We can get here if the user has ignored the senders of all - # the recent events. - state_at_timeline_start = await self.get_state_at( - room_id, - stream_position=end_token, + if batch: + state_at_timeline_start = ( + await self._state_storage_controller.get_state_ids_for_event( + batch.events[0].event_id, state_filter=state_filter, await_full_state=await_full_state, ) + ) + else: + # We can get here if the user has ignored the senders of all + # the recent events. + state_at_timeline_start = await self.get_state_at( + room_id, + stream_position=end_token, + state_filter=state_filter, + await_full_state=await_full_state, + ) + if batch.limited: # for now, we disable LL for gappy syncs - see # https://github.com/vector-im/riot-web/issues/7211#issuecomment-419976346 # N.B. this slows down incr syncs as we are now processing way @@ -1290,47 +1292,28 @@ async def _compute_state_delta_for_incremental_sync( # about them). state_filter = StateFilter.all() - state_at_previous_sync = await self.get_state_at( - room_id, - stream_position=since_token, - state_filter=state_filter, - await_full_state=await_full_state, - ) + state_at_previous_sync = await self.get_state_at( + room_id, + stream_position=since_token, + state_filter=state_filter, + await_full_state=await_full_state, + ) - state_at_timeline_end = await self.get_state_at( - room_id, - stream_position=end_token, - state_filter=state_filter, - await_full_state=await_full_state, - ) + state_at_timeline_end = await self.get_state_at( + room_id, + stream_position=end_token, + state_filter=state_filter, + await_full_state=await_full_state, + ) + + state_ids = _calculate_state( + timeline_contains=timeline_state, + timeline_start=state_at_timeline_start, + timeline_end=state_at_timeline_end, + previous_timeline_end=state_at_previous_sync, + lazy_load_members=lazy_load_members, + ) - state_ids = _calculate_state( - timeline_contains=timeline_state, - timeline_start=state_at_timeline_start, - timeline_end=state_at_timeline_end, - previous_timeline_end=state_at_previous_sync, - lazy_load_members=lazy_load_members, - ) - else: - state_ids = {} - if lazy_load_members: - if members_to_fetch and batch.events: - # We're returning an incremental sync, with no - # "gap" since the previous sync, so normally there would be - # no state to return. - # But we're lazy-loading, so the client might need some more - # member events to understand the events in this timeline. - # So we fish out all the member events corresponding to the - # timeline here. The caller will then dedupe any redundant ones. - - state_ids = await self._state_storage_controller.get_state_ids_for_event( - batch.events[0].event_id, - # we only want members! - state_filter=StateFilter.from_types( - (EventTypes.Member, member) for member in members_to_fetch - ), - await_full_state=False, - ) return state_ids async def _find_missing_partial_state_memberships( diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index 5d8e886541..57e14d79ca 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -435,6 +435,111 @@ def test_state_includes_changes_on_forks_when_events_excluded(self) -> None: [s2_event], ) + def test_state_includes_changes_on_ungappy_syncs(self) -> None: + """Test `state` where the sync is not gappy. + + We start with a DAG like this: + + E1 + ↗ ↖ + | S2 + | + --|--- + | + E3 + + ... and initialsync with `limit=1`, represented by the horizontal dashed line. + At this point, we do not expect S2 to appear in the response at all (since + it is excluded from the timeline by the `limit`, and the state is based on the + state after the most recent event before the sync token (E3), which doesn't + include S2. + + Now more events arrive, and we do an incremental sync: + + E1 + ↗ ↖ + | S2 + | ↑ + E3 | + ↑ | + --|------|---- + | | + E4 | + ↖ / + E5 + + This is the last chance for us to tell the client about S2, so it *must* be + included in the response. + """ + alice = self.register_user("alice", "password") + alice_tok = self.login(alice, "password") + alice_requester = create_requester(alice) + room_id = self.helper.create_room_as(alice, is_public=True, tok=alice_tok) + + # Do an initial sync to get a known starting point. + initial_sync_result = self.get_success( + self.sync_handler.wait_for_sync_for_user( + alice_requester, generate_sync_config(alice) + ) + ) + last_room_creation_event_id = ( + initial_sync_result.joined[0].timeline.events[-1].event_id + ) + + # Send a state event, and a regular event, both using the same prev ID + with self._patch_get_latest_events([last_room_creation_event_id]): + s2_event = self.helper.send_state(room_id, "s2", {}, tok=alice_tok)[ + "event_id" + ] + e3_event = self.helper.send(room_id, "e3", tok=alice_tok)["event_id"] + + # Another initial sync, with limit=1 + initial_sync_result = self.get_success( + self.sync_handler.wait_for_sync_for_user( + alice_requester, + generate_sync_config( + alice, + filter_collection=FilterCollection( + self.hs, {"room": {"timeline": {"limit": 1}}} + ), + ), + ) + ) + room_sync = initial_sync_result.joined[0] + self.assertEqual(room_sync.room_id, room_id) + self.assertEqual( + [e.event_id for e in room_sync.timeline.events], + [e3_event], + ) + self.assertNotIn(s2_event, [e.event_id for e in room_sync.state.values()]) + + # More events, E4 and E5 + with self._patch_get_latest_events([e3_event]): + e4_event = self.helper.send(room_id, "e4", tok=alice_tok)["event_id"] + e5_event = self.helper.send(room_id, "e5", tok=alice_tok)["event_id"] + + # Now incremental sync + incremental_sync = self.get_success( + self.sync_handler.wait_for_sync_for_user( + alice_requester, + generate_sync_config(alice), + since_token=initial_sync_result.next_batch, + ) + ) + + # The state event should appear in the 'state' section of the response. + room_sync = incremental_sync.joined[0] + self.assertEqual(room_sync.room_id, room_id) + self.assertFalse(room_sync.timeline.limited) + self.assertEqual( + [e.event_id for e in room_sync.timeline.events], + [e4_event, e5_event], + ) + self.assertEqual( + [e.event_id for e in room_sync.state.values()], + [s2_event], + ) + @parameterized.expand( [ (False, False),