diff --git a/edb/api/errors.txt b/edb/api/errors.txt index ab886802f6d..a90ba01a7ff 100644 --- a/edb/api/errors.txt +++ b/edb/api/errors.txt @@ -148,7 +148,7 @@ 0x_08_00_00_00 AvailabilityError 0x_08_00_00_01 BackendUnavailableError #SHOULD_RETRY - +0x_08_00_00_02 ServerOfflineError #### diff --git a/edb/errors/__init__.py b/edb/errors/__init__.py index 826d322790b..5808dedc823 100644 --- a/edb/errors/__init__.py +++ b/edb/errors/__init__.py @@ -89,6 +89,7 @@ 'AuthenticationError', 'AvailabilityError', 'BackendUnavailableError', + 'ServerOfflineError', 'BackendError', 'UnsupportedBackendFeatureError', 'LogMessage', @@ -416,6 +417,10 @@ class BackendUnavailableError(AvailabilityError): _code = 0x_08_00_00_01 +class ServerOfflineError(AvailabilityError): + _code = 0x_08_00_00_02 + + class BackendError(EdgeDBError): _code = 0x_09_00_00_00 diff --git a/edb/server/args.py b/edb/server/args.py index d736ae48280..015dadb2f9a 100644 --- a/edb/server/args.py +++ b/edb/server/args.py @@ -93,8 +93,17 @@ class JOSEKeyMode(enum.StrEnum): class ReadinessState(enum.StrEnum): Default = "default" + """Default state: serving normally""" + NotReady = "not_ready" + """/server/status/ready returns an error, but clients can still connect.""" + ReadOnly = "read_only" + """Only read-only queries are allowed.""" + + Offline = "offline" + """Any existing connections are gracefully terminated and no new + connections are allowed.""" class ServerAuthMethod(enum.StrEnum): diff --git a/edb/server/dbview/dbview.pyx b/edb/server/dbview/dbview.pyx index 2a034b63efe..ffaa5d13df8 100644 --- a/edb/server/dbview/dbview.pyx +++ b/edb/server/dbview/dbview.pyx @@ -1099,6 +1099,13 @@ cdef class DatabaseConnectionView: error_constructor, reason, ): + if not self.server.is_online(): + readiness_reason = self.server.get_readiness_reason() + msg = "the server is going offline" + if readiness_reason: + msg = f"{msg}: {readiness_reason}" + raise errors.ServerOfflineError(msg) + if query_capabilities & ~self._capability_mask: # _capability_mask is currently only used for system database raise query_capabilities.make_error( @@ -1106,18 +1113,24 @@ cdef class DatabaseConnectionView: errors.UnsupportedCapabilityError, "system database is read-only", ) + if query_capabilities & ~allowed_capabilities: raise query_capabilities.make_error( allowed_capabilities, error_constructor, reason, ) + if self.server.is_readonly(): if query_capabilities & enums.Capability.WRITE: + readiness_reason = self.server.get_readiness_reason() + msg = "the server is currently in read-only mode" + if readiness_reason: + msg = f"{msg}: {readiness_reason}" raise query_capabilities.make_error( ~enums.Capability.WRITE, errors.DisabledCapabilityError, - "the server is currently in read-only mode", + msg, ) diff --git a/edb/server/protocol/binary.pyx b/edb/server/protocol/binary.pyx index 322b42fe371..6d6d68cea32 100644 --- a/edb/server/protocol/binary.pyx +++ b/edb/server/protocol/binary.pyx @@ -1093,6 +1093,12 @@ cdef class EdgeConnection(frontend.FrontendConnection): self.write_error(ex) self.flush() + if isinstance(ex, errors.ServerOfflineError): + # This server is going into "offline" mode, + # close the connection. + self.close() + return + # The connection was aborted while we were # interpreting the error (via compiler/errmech.py). if self._con_status == EDGECON_BAD: diff --git a/edb/server/protocol/binary_v0.pyx b/edb/server/protocol/binary_v0.pyx index 6edde8c7090..8f6ca7614c8 100644 --- a/edb/server/protocol/binary_v0.pyx +++ b/edb/server/protocol/binary_v0.pyx @@ -774,6 +774,12 @@ cdef class EdgeConnectionBackwardsCompatible(EdgeConnection): self.write_error(ex) self.flush() + if isinstance(ex, errors.ServerOfflineError): + # This server is going into "offline" mode, + # close the connection. + self.close() + return + # The connection was aborted while we were # interpreting the error (via compiler/errmech.py). if self._con_status == EDGECON_BAD: diff --git a/edb/server/server.py b/edb/server/server.py index bbea71dc680..3e70c1dcb68 100644 --- a/edb/server/server.py +++ b/edb/server/server.py @@ -280,6 +280,7 @@ def __init__( self._admin_ui = admin_ui self._readiness = srvargs.ReadinessState.Default + self._readiness_reason = "" # A set of databases that should not accept new connections. self._block_new_connections: set[str] = set() @@ -326,12 +327,21 @@ def in_test_mode(self): def is_admin_ui_enabled(self): return self._admin_ui + def is_online(self) -> bool: + return self._readiness is not srvargs.ReadinessState.Offline + def is_ready(self) -> bool: - return self._readiness is srvargs.ReadinessState.Default + return ( + self._readiness is srvargs.ReadinessState.Default + or self._readiness is srvargs.ReadinessState.ReadOnly + ) def is_readonly(self) -> bool: return self._readiness is srvargs.ReadinessState.ReadOnly + def get_readiness_reason(self) -> str: + return self._readiness_reason + def get_pg_dbname(self, dbname: str) -> str: return self._cluster.get_db_name(dbname) @@ -1960,13 +1970,16 @@ def reload_state_file(_file_modified, _event): def reload_readiness_state(self, state_file): try: with open(state_file, 'rt') as rt: - state = rt.readline().strip() + line = rt.readline().strip() try: + state, _, reason = line.partition(":") self._readiness = srvargs.ReadinessState(state) + self._readiness_reason = reason logger.info( "readiness state file changed, " - "setting server readiness to %r", + "setting server readiness to %r%s", state, + f" ({reason})" if reason else "", ) except ValueError: logger.warning( @@ -1994,6 +2007,8 @@ def reload_readiness_state(self, state_file): ) self._readiness = srvargs.ReadinessState.Default + self._accepting_connections = self.is_online() + def reload_tls(self, tls_cert_file, tls_key_file): logger.info("loading TLS certificates") tls_password_needed = False diff --git a/tests/test_server_ops.py b/tests/test_server_ops.py index 2ec9434085b..2fce57fc8ac 100644 --- a/tests/test_server_ops.py +++ b/tests/test_server_ops.py @@ -839,6 +839,45 @@ async def test_server_ops_readonly(self): rf.close() os.unlink(rf_name) + async def test_server_ops_offline(self): + rf_no, rf_name = tempfile.mkstemp(text=True) + rf = open(rf_no, "wt") + + try: + print("default", file=rf, flush=True) + + async with tb.start_edgedb_server( + readiness_state_file=rf_name, + ) as sd: + conn = await sd.connect() + await conn.execute("select 1") + + # Go offline + rf.seek(0) + print("offline", file=rf, flush=True) + await asyncio.sleep(0.01) + + with self.assertRaises( + (edgedb.AvailabilityError, edgedb.ClientConnectionError), + ): + await conn.execute("select 1") + + # Clear read-only by removing the file + rf.close() + os.unlink(rf_name) + await asyncio.sleep(0.05) + async for tr in self.try_until_succeeds( + ignore=(errors.ClientConnectionError,), + ): + async with tr: + await conn.execute("select 1") + + await conn.aclose() + finally: + if os.path.exists(rf_name): + rf.close() + os.unlink(rf_name) + async def test_server_ops_restore_with_schema_signal(self): async def test(pgdata_path): backend_dsn = f'postgres:///?user=postgres&host={pgdata_path}'