From ce407e21eaf97c84d1eb27a14986869eb5bb4838 Mon Sep 17 00:00:00 2001 From: Fredrik Eriksson Date: Wed, 24 May 2023 17:54:42 +0200 Subject: [PATCH] gateway: outbound might core if connection lost during QM "restart" Before: outbound might core if the following is true * There is a connection lost. * QM is "absent" (has been online before, but is restarting) * outbound has advertised at least one queue for the connection * No other connection has the same queue Now: We detect the error that QM is "absent" and just discard sending the _unadvertise_ message (to the non existing QM) Resolves #199 --- middleware/gateway/include/gateway/group/tcp.h | 16 ++++++++++++++-- .../gateway/source/group/outbound/handle.cpp | 8 +++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/middleware/gateway/include/gateway/group/tcp.h b/middleware/gateway/include/gateway/group/tcp.h index ee51d3f91..70f445e30 100644 --- a/middleware/gateway/include/gateway/group/tcp.h +++ b/middleware/gateway/include/gateway/group/tcp.h @@ -159,7 +159,7 @@ namespace casual common::algorithm::container::trim( m_connections, common::algorithm::remove( m_connections, descriptor)); auto found = common::algorithm::find( m_information, descriptor); - casual::assertion( descriptor, "fail to find information for descriptor: ", descriptor); + casual::assertion( found, "fail to find information for descriptor: ", descriptor); return common::algorithm::container::extract( m_information, std::begin( found)); } @@ -259,7 +259,19 @@ namespace casual // we 'lost' the connection in some way - we put a connection::Lost on our own ipc-device, and handle it // later (and differently depending on if we're 'regular' or 'reversed') - common::communication::ipc::inbound::device().push( lost( state, descriptor)); + // + // NOTE: lost functor might throw, and we have noexcept. If we fail to execute lost and handle the lost connection + // properly there's no point in going on, we have a broken state in some way. We still catch and log the + // error to help find potential future errors in lost (it should not throw). + // TODO: Make sure lost is noexcept (compile time) and force the responsibility to "where it belongs"? + try + { + common::communication::ipc::inbound::device().push( lost( state, descriptor)); + } + catch( ...) + { + casual::terminate( "failed to handle lost connection for descriptor: ", descriptor, " - error: ", exception::capture()); + } } } } // detail::handle::communication diff --git a/middleware/gateway/source/group/outbound/handle.cpp b/middleware/gateway/source/group/outbound/handle.cpp index eb0f96b8a..f5fdcafa7 100644 --- a/middleware/gateway/source/group/outbound/handle.cpp +++ b/middleware/gateway/source/group/outbound/handle.cpp @@ -969,7 +969,13 @@ namespace casual { casual::queue::ipc::message::Advertise request{ common::process::handle()}; request.queues.remove = std::move( resources.queues); - state.multiplex.send( ipc::manager::optional::queue(), request); + // this throws if queue-manager is absent. + // TODO send on instance::optional::Device should not throw? + common::exception::guard( [ &]() + { + state.multiplex.send( ipc::manager::optional::queue(), request); + }); + } }