From a83a337c4dd908db82ed555e8340ee3b52f34e9e Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 8 Jan 2024 17:24:20 +0000 Subject: [PATCH] Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. (#16759) The idea here being that the directory server shouldn't advertise rooms to a requesting server is the requesting server would not be allowed to join or participate in the room. Base: `develop` Original commit schedule, with full messages:
  1. Pass `from_federation_origin` down into room list retrieval code
  2. Don't cache /publicRooms response for inbound federated requests
  3. fixup! Don't cache /publicRooms response for inbound federated requests
  4. Cap the number of /publicRooms entries to 100
  5. Simplify code now that you can't request unlimited rooms
  6. Filter out rooms from federated requests that don't have the correct ACL
  7. Request a handful more when filtering ACLs so that we can try to avoid shortchanging the requester
--------- Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/16759.feature | 1 + .../federation/transport/server/__init__.py | 7 +- synapse/handlers/room_list.py | 177 +++++++++++++----- tests/handlers/test_room_list.py | 88 +++++++++ 4 files changed, 221 insertions(+), 52 deletions(-) create mode 100644 changelog.d/16759.feature create mode 100644 tests/handlers/test_room_list.py diff --git a/changelog.d/16759.feature b/changelog.d/16759.feature new file mode 100644 index 0000000000..5846e5a9f0 --- /dev/null +++ b/changelog.d/16759.feature @@ -0,0 +1 @@ +Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. \ No newline at end of file diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py index 839092e4d3..74391e3cb2 100644 --- a/synapse/federation/transport/server/__init__.py +++ b/synapse/federation/transport/server/__init__.py @@ -154,7 +154,10 @@ async def on_GET( limit = None data = await self.handler.get_local_public_room_list( - limit, since_token, network_tuple=network_tuple, from_federation=True + limit, + since_token, + network_tuple=network_tuple, + from_federation_origin=origin, ) return 200, data @@ -195,7 +198,7 @@ async def on_POST( since_token=since_token, search_filter=search_filter, network_tuple=network_tuple, - from_federation=True, + from_federation_origin=origin, ) return 200, data diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index bfd205db49..5bad133f6b 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -19,7 +19,7 @@ # import logging -from typing import TYPE_CHECKING, Any, Optional, Tuple +from typing import TYPE_CHECKING, Any, List, Optional, Tuple import attr import msgpack @@ -54,6 +54,9 @@ # This is used to indicate we should only return rooms published to the main list. EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) +# Maximum number of local public rooms returned over the CS or SS API +MAX_PUBLIC_ROOMS_IN_RESPONSE = 100 + class RoomListHandler: def __init__(self, hs: "HomeServer"): @@ -74,7 +77,7 @@ async def get_local_public_room_list( since_token: Optional[str] = None, search_filter: Optional[dict] = None, network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, - from_federation: bool = False, + from_federation_origin: Optional[str] = None, ) -> JsonDict: """Generate a local public room list. @@ -89,7 +92,8 @@ async def get_local_public_room_list( This can be (None, None) to indicate the main list, or a particular appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. - from_federation: true iff the request comes from the federation API + from_federation_origin: the server name of the requester, or None + if the request is not from federation. """ if not self.enable_room_list_search: return {"chunk": [], "total_room_count_estimate": 0} @@ -102,36 +106,43 @@ async def get_local_public_room_list( network_tuple, ) - if search_filter: + capped_limit: int = ( + MAX_PUBLIC_ROOMS_IN_RESPONSE + if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE + else limit + ) + + if search_filter or from_federation_origin is not None: # We explicitly don't bother caching searches or requests for # appservice specific lists. - logger.info("Bypassing cache as search request.") + # We also don't bother caching requests from federated homeservers. + logger.debug("Bypassing cache as search or federation request.") return await self._get_public_room_list( - limit, + capped_limit, since_token, search_filter, network_tuple=network_tuple, - from_federation=from_federation, + from_federation_origin=from_federation_origin, ) - key = (limit, since_token, network_tuple) + key = (capped_limit, since_token, network_tuple) return await self.response_cache.wrap( key, self._get_public_room_list, - limit, + capped_limit, since_token, network_tuple=network_tuple, - from_federation=from_federation, + from_federation_origin=from_federation_origin, ) async def _get_public_room_list( self, - limit: Optional[int] = None, + limit: int, since_token: Optional[str] = None, search_filter: Optional[dict] = None, network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, - from_federation: bool = False, + from_federation_origin: Optional[str] = None, ) -> JsonDict: """Generate a public room list. Args: @@ -142,8 +153,8 @@ async def _get_public_room_list( This can be (None, None) to indicate the main list, or a particular appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. - from_federation: Whether this request originated from a - federating server or a client. Used for room filtering. + from_federation_origin: the server name of the requester, or None + if the request is not from federation. """ # Pagination tokens work by storing the room ID sent in the last batch, @@ -165,8 +176,16 @@ async def _get_public_room_list( forwards = True has_batch_token = False - # we request one more than wanted to see if there are more pages to come - probing_limit = limit + 1 if limit is not None else None + if from_federation_origin is None: + # Client-Server API: + # we request one more than wanted to see if there are more pages to come + probing_limit = limit + 1 + else: + # Federation API: + # we request a handful more in case any get filtered out by ACLs + # as a best easy effort attempt to return the full number of entries + # specified by `limit`. + probing_limit = limit + 10 results = await self.store.get_largest_public_rooms( network_tuple, @@ -174,7 +193,7 @@ async def _get_public_room_list( probing_limit, bounds=bounds, forwards=forwards, - ignore_non_federatable=from_federation, + ignore_non_federatable=from_federation_origin is not None, ) def build_room_entry(room: LargestRoomStats) -> JsonDict: @@ -195,59 +214,117 @@ def build_room_entry(room: LargestRoomStats) -> JsonDict: # Filter out Nones – rather omit the field altogether return {k: v for k, v in entry.items() if v is not None} - response: JsonDict = {} + # Build a list of up to `limit` entries. + room_entries: List[JsonDict] = [] + rooms_iterator = results if forwards else reversed(results) + + # Track the first and last 'considered' rooms so that we can provide correct + # next_batch/prev_batch tokens. + # This is needed because the loop might finish early when + # `len(room_entries) >= limit` and we might be left with rooms we didn't + # 'consider' (iterate over) and we should save those rooms for the next + # batch. + first_considered_room: Optional[LargestRoomStats] = None + last_considered_room: Optional[LargestRoomStats] = None + cut_off_due_to_limit: bool = False + + for room_result in rooms_iterator: + if len(room_entries) >= limit: + cut_off_due_to_limit = True + break + + if first_considered_room is None: + first_considered_room = room_result + last_considered_room = room_result + + if from_federation_origin is not None: + # If this is a federated request, apply server ACLs if the room has any set + acl_evaluator = ( + await self._storage_controllers.state.get_server_acl_for_room( + room_result.room_id + ) + ) + + if acl_evaluator is not None: + if not acl_evaluator.server_matches_acl_event( + from_federation_origin + ): + # the requesting server is ACL blocked by the room, + # don't show in directory + continue + + room_entries.append(build_room_entry(room_result)) + + if not forwards: + # If we are paginating backwards, we still return the chunk in + # biggest-first order, so reverse again. + room_entries.reverse() + # Swap the order of first/last considered rooms. + first_considered_room, last_considered_room = ( + last_considered_room, + first_considered_room, + ) + + response: JsonDict = { + "chunk": room_entries, + } num_results = len(results) - if limit is not None: - more_to_come = num_results == probing_limit - # Depending on direction we trim either the front or back. - if forwards: - results = results[:limit] + more_to_come_from_database = num_results == probing_limit + + if forwards and has_batch_token: + # If there was a token given then we assume that there + # must be previous results, even if there were no results in this batch. + if first_considered_room is not None: + response["prev_batch"] = RoomListNextBatch( + last_joined_members=first_considered_room.joined_members, + last_room_id=first_considered_room.room_id, + direction_is_forward=False, + ).to_token() else: - results = results[-limit:] - else: - more_to_come = False + # If we didn't find any results this time, + # we don't have an actual room ID to put in the token. + # But since `first_considered_room` is None, we know that we have + # reached the end of the results. + # So we can use a token of (0, empty room ID) to paginate from the end + # next time. + response["prev_batch"] = RoomListNextBatch( + last_joined_members=0, + last_room_id="", + direction_is_forward=False, + ).to_token() if num_results > 0: - final_entry = results[-1] - initial_entry = results[0] - + assert first_considered_room is not None + assert last_considered_room is not None if forwards: - if has_batch_token: - # If there was a token given then we assume that there - # must be previous results. - response["prev_batch"] = RoomListNextBatch( - last_joined_members=initial_entry.joined_members, - last_room_id=initial_entry.room_id, - direction_is_forward=False, - ).to_token() - - if more_to_come: + if more_to_come_from_database or cut_off_due_to_limit: response["next_batch"] = RoomListNextBatch( - last_joined_members=final_entry.joined_members, - last_room_id=final_entry.room_id, + last_joined_members=last_considered_room.joined_members, + last_room_id=last_considered_room.room_id, direction_is_forward=True, ).to_token() - else: + else: # backwards if has_batch_token: response["next_batch"] = RoomListNextBatch( - last_joined_members=final_entry.joined_members, - last_room_id=final_entry.room_id, + last_joined_members=last_considered_room.joined_members, + last_room_id=last_considered_room.room_id, direction_is_forward=True, ).to_token() - if more_to_come: + if more_to_come_from_database or cut_off_due_to_limit: response["prev_batch"] = RoomListNextBatch( - last_joined_members=initial_entry.joined_members, - last_room_id=initial_entry.room_id, + last_joined_members=first_considered_room.joined_members, + last_room_id=first_considered_room.room_id, direction_is_forward=False, ).to_token() - response["chunk"] = [build_room_entry(r) for r in results] - + # We can't efficiently count the total number of rooms that are not + # blocked by ACLs, but this is just an estimate so that should be + # good enough. response["total_room_count_estimate"] = await self.store.count_public_rooms( network_tuple, - ignore_non_federatable=from_federation, + ignore_non_federatable=from_federation_origin is not None, search_filter=search_filter, ) diff --git a/tests/handlers/test_room_list.py b/tests/handlers/test_room_list.py new file mode 100644 index 0000000000..4d22ef98c2 --- /dev/null +++ b/tests/handlers/test_room_list.py @@ -0,0 +1,88 @@ +from http import HTTPStatus +from typing import Optional, Set + +from synapse.rest import admin +from synapse.rest.client import directory, login, room +from synapse.types import JsonDict + +from tests import unittest + + +class RoomListHandlerTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + directory.register_servlets, + ] + + def _create_published_room( + self, tok: str, extra_content: Optional[JsonDict] = None + ) -> str: + room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content) + channel = self.make_request( + "PUT", + f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}", + content={ + "visibility": "public", + }, + ) + assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}" + return room_id + + def test_acls_applied_to_room_directory_results(self) -> None: + """ + Creates 3 rooms. Room 2 has an ACL that only permits the homeservers + `test` and `test2` to access it. + + We then simulate `test2` and `test3` requesting the room directory and assert + that `test3` does not see room 2, but `test2` sees all 3. + """ + self.register_user("u1", "p1") + u1tok = self.login("u1", "p1") + room1 = self._create_published_room(u1tok) + + room2 = self._create_published_room( + u1tok, + extra_content={ + "initial_state": [ + { + "type": "m.room.server_acl", + "content": { + "allow": ["test", "test2"], + }, + } + ] + }, + ) + + room3 = self._create_published_room(u1tok) + + room_list = self.get_success( + self.hs.get_room_list_handler().get_local_public_room_list( + limit=50, from_federation_origin="test2" + ) + ) + room_ids_in_test2_list: Set[str] = { + entry["room_id"] for entry in room_list["chunk"] + } + + room_list = self.get_success( + self.hs.get_room_list_handler().get_local_public_room_list( + limit=50, from_federation_origin="test3" + ) + ) + room_ids_in_test3_list: Set[str] = { + entry["room_id"] for entry in room_list["chunk"] + } + + self.assertEqual( + room_ids_in_test2_list, + {room1, room2, room3}, + "test2 should be able to see all 3 rooms", + ) + self.assertEqual( + room_ids_in_test3_list, + {room1, room3}, + "test3 should be able to see only 2 rooms", + )