-
Notifications
You must be signed in to change notification settings - Fork 416
Expire sliding sync connections #19211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4a1b127
5fde889
e5a8061
41dfbd2
321465b
a3d6299
6b0de5f
482c596
2308b04
5951328
375ab97
76a7af4
ff4fde6
1e74b34
aaf541c
b139c38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Expire sliding sync connections that are too old or have too much pending data. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
|
|
||
| from synapse.api.errors import SlidingSyncUnknownPosition | ||
| from synapse.logging.opentracing import log_kv | ||
| from synapse.metrics.background_process_metrics import wrap_as_background_process | ||
| from synapse.storage._base import SQLBaseStore, db_to_json | ||
| from synapse.storage.database import ( | ||
| DatabasePool, | ||
|
|
@@ -36,6 +37,12 @@ | |
| RoomSyncConfig, | ||
| ) | ||
| from synapse.util.caches.descriptors import cached | ||
| from synapse.util.constants import ( | ||
| MILLISECONDS_PER_SECOND, | ||
| ONE_DAY_SECONDS, | ||
| ONE_HOUR_SECONDS, | ||
| ONE_MINUTE_SECONDS, | ||
| ) | ||
| from synapse.util.json import json_encoder | ||
|
|
||
| if TYPE_CHECKING: | ||
|
|
@@ -45,6 +52,21 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| # How often to update the `last_used_ts` column on | ||
| # `sliding_sync_connection_positions` when the client uses a connection | ||
| # position. We don't want to update it on every use to avoid excessive | ||
| # writes, but we want it to be reasonably up-to-date to help with | ||
| # cleaning up old connection positions. | ||
| UPDATE_INTERVAL_LAST_USED_TS_MS = 5 * ONE_MINUTE_SECONDS * MILLISECONDS_PER_SECOND | ||
|
|
||
| # Time in milliseconds the connection hasn't been used before we consider it | ||
| # expired and delete it. | ||
| CONNECTION_EXPIRY_MS = 7 * ONE_DAY_SECONDS * MILLISECONDS_PER_SECOND | ||
|
|
||
| # How often we run the background process to delete old sliding sync connections. | ||
| CONNECTION_EXPIRY_FREQUENCY_MS = ONE_HOUR_SECONDS * MILLISECONDS_PER_SECOND | ||
|
|
||
|
|
||
| class SlidingSyncStore(SQLBaseStore): | ||
| def __init__( | ||
| self, | ||
|
|
@@ -76,6 +98,12 @@ def __init__( | |
| replaces_index="sliding_sync_membership_snapshots_user_id", | ||
| ) | ||
|
|
||
| if self.hs.config.worker.run_background_tasks: | ||
| self.clock.looping_call( | ||
| self.delete_old_sliding_sync_connections, | ||
| CONNECTION_EXPIRY_FREQUENCY_MS, | ||
| ) | ||
|
|
||
| async def get_latest_bump_stamp_for_room( | ||
| self, | ||
| room_id: str, | ||
|
|
@@ -202,6 +230,7 @@ def persist_per_connection_state_txn( | |
| "effective_device_id": device_id, | ||
| "conn_id": conn_id, | ||
| "created_ts": self.clock.time_msec(), | ||
| "last_used_ts": self.clock.time_msec(), | ||
| }, | ||
| returning=("connection_key",), | ||
| ) | ||
|
|
@@ -384,7 +413,7 @@ def _get_and_clear_connection_positions_txn( | |
| # The `previous_connection_position` is a user-supplied value, so we | ||
| # need to make sure that the one they supplied is actually theirs. | ||
| sql = """ | ||
| SELECT connection_key | ||
| SELECT connection_key, last_used_ts | ||
| FROM sliding_sync_connection_positions | ||
| INNER JOIN sliding_sync_connections USING (connection_key) | ||
| WHERE | ||
|
|
@@ -396,7 +425,20 @@ def _get_and_clear_connection_positions_txn( | |
| if row is None: | ||
| raise SlidingSyncUnknownPosition() | ||
|
|
||
| (connection_key,) = row | ||
| (connection_key, last_used_ts) = row | ||
|
|
||
| # Update the `last_used_ts` if it's due to be updated. We don't update | ||
| # every time to avoid excessive writes. | ||
| now = self.clock.time_msec() | ||
| if last_used_ts is None or now - last_used_ts > UPDATE_INTERVAL_LAST_USED_TS_MS: | ||
| self.db_pool.simple_update_txn( | ||
| txn, | ||
| table="sliding_sync_connections", | ||
| keyvalues={ | ||
| "connection_key": connection_key, | ||
| }, | ||
| updatevalues={"last_used_ts": now}, | ||
| ) | ||
|
|
||
| # Now that we have seen the client has received and used the connection | ||
| # position, we can delete all the other connection positions. | ||
|
|
@@ -480,12 +522,30 @@ def _get_and_clear_connection_positions_txn( | |
| logger.warning("Unrecognized sliding sync stream in DB %r", stream) | ||
|
|
||
| return PerConnectionStateDB( | ||
| last_used_ts=last_used_ts, | ||
| rooms=RoomStatusMap(rooms), | ||
| receipts=RoomStatusMap(receipts), | ||
| account_data=RoomStatusMap(account_data), | ||
| room_configs=room_configs, | ||
| ) | ||
|
|
||
| @wrap_as_background_process("delete_old_sliding_sync_connections") | ||
| async def delete_old_sliding_sync_connections(self) -> None: | ||
| """Delete sliding sync connections that have not been used for a long time.""" | ||
| cutoff_ts = self.clock.time_msec() - CONNECTION_EXPIRY_MS | ||
|
|
||
| def delete_old_sliding_sync_connections_txn(txn: LoggingTransaction) -> None: | ||
| sql = """ | ||
| DELETE FROM sliding_sync_connections | ||
| WHERE last_used_ts IS NOT NULL AND last_used_ts < ? | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like we should also have a background job that fills in Could be split out to another PR.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I am a bit undecided whether next release we just delete all connections with a null |
||
| """ | ||
| txn.execute(sql, (cutoff_ts,)) | ||
|
|
||
| await self.db_pool.runInteraction( | ||
| "delete_old_sliding_sync_connections", | ||
| delete_old_sliding_sync_connections_txn, | ||
| ) | ||
|
|
||
|
|
||
| @attr.s(auto_attribs=True, frozen=True) | ||
| class PerConnectionStateDB: | ||
|
|
@@ -498,6 +558,8 @@ class PerConnectionStateDB: | |
| When persisting this *only* contains updates to the state. | ||
| """ | ||
|
|
||
| last_used_ts: int | None | ||
|
|
||
| rooms: "RoomStatusMap[str]" | ||
| receipts: "RoomStatusMap[str]" | ||
| account_data: "RoomStatusMap[str]" | ||
|
|
@@ -553,6 +615,7 @@ async def from_state( | |
| ) | ||
|
|
||
| return PerConnectionStateDB( | ||
| last_used_ts=per_connection_state.last_used_ts, | ||
| rooms=RoomStatusMap(rooms), | ||
| receipts=RoomStatusMap(receipts), | ||
| account_data=RoomStatusMap(account_data), | ||
|
|
@@ -596,6 +659,7 @@ async def to_state(self, store: "DataStore") -> "PerConnectionState": | |
| } | ||
|
|
||
| return PerConnectionState( | ||
| last_used_ts=self.last_used_ts, | ||
| rooms=RoomStatusMap(rooms), | ||
| receipts=RoomStatusMap(receipts), | ||
| account_data=RoomStatusMap(account_data), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| -- | ||
| -- This file is licensed under the Affero General Public License (AGPL) version 3. | ||
| -- | ||
| -- Copyright (C) 2025 Element Creations, Ltd | ||
| -- | ||
| -- This program is free software: you can redistribute it and/or modify | ||
| -- it under the terms of the GNU Affero General Public License as | ||
| -- published by the Free Software Foundation, either version 3 of the | ||
| -- License, or (at your option) any later version. | ||
| -- | ||
| -- See the GNU Affero General Public License for more details: | ||
| -- <https://www.gnu.org/licenses/agpl-3.0.html>. | ||
|
|
||
| -- Add a timestamp for when the sliding sync connection position was last used, | ||
| -- only updated with a small granularity. | ||
| -- | ||
| -- This should be NOT NULL, but we need to consider existing rows. In future we | ||
| -- may want to either backfill this or delete all rows with a NULL value (and | ||
| -- then make it NOT NULL). | ||
| ALTER TABLE sliding_sync_connections ADD COLUMN last_used_ts BIGINT; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have added a comment: -- Note: We don't add an index on this column to allow HOT updates on PostgreSQL
-- to reduce the cost of the updates to the column. c.f.
-- https://www.postgresql.org/docs/current/storage-hot.html
--
-- We do query this column directly to find expired connections, but we expect
-- that to be an infrequent operation and a sequential scan should be fine.
erikjohnston marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| -- Note: We don't add an index on this column to allow HOT updates on PostgreSQL | ||
| -- to reduce the cost of the updates to the column. c.f. | ||
| -- https://www.postgresql.org/docs/current/storage-hot.html | ||
| -- | ||
| -- We do query this column directly to find expired connections, but we expect | ||
| -- that to be an infrequent operation and a sequential scan should be fine. | ||
Uh oh!
There was an error while loading. Please reload this page.