From 17dc0a96855fe0e67b118479fa9d0bc9030150dd Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 11:44:56 +0200 Subject: [PATCH 01/41] Rebase initial impl on develop --- include/boost/redis/connection.hpp | 195 ++++++---------------- include/boost/redis/detail/writer_fsm.hpp | 46 ++++- include/boost/redis/impl/connection.ipp | 8 +- include/boost/redis/impl/reader_fsm.ipp | 11 ++ include/boost/redis/impl/writer_fsm.ipp | 81 +++++++-- 5 files changed, 171 insertions(+), 170 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index fbda48ea..faf45119 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -80,11 +80,9 @@ struct connection_impl { void(system::error_code, std::size_t)>; redis_stream stream_; - - // Notice we use a timer to simulate a condition-variable. It is - // also more suitable than a channel and the notify operation does - // not suspend. - timer_type writer_timer_; + timer_type writer_timer_; // timer used for write timeouts + timer_type writer_cv_; // condition variable, cancelled when there is new data to write + timer_type reader_timer_; // timer used for read timeouts timer_type reconnect_timer_; // to wait the reconnection period timer_type ping_timer_; // to wait between pings receive_channel_type receive_channel_; @@ -93,7 +91,7 @@ struct connection_impl { using executor_type = Executor; - executor_type get_executor() noexcept { return writer_timer_.get_executor(); } + executor_type get_executor() noexcept { return writer_cv_.get_executor(); } struct exec_op { connection_impl* obj_ = nullptr; @@ -116,7 +114,7 @@ struct connection_impl { asio::async_immediate(self.get_io_executor(), std::move(self)); return; case exec_action_type::notify_writer: - obj_->writer_timer_.cancel(); + obj_->writer_cv_.cancel(); continue; // this action does not require yielding case exec_action_type::wait_for_response: notifier_->async_receive(std::move(self)); @@ -133,13 +131,15 @@ struct connection_impl { connection_impl(Executor&& ex, asio::ssl::context&& ctx, logger&& lgr) : stream_{ex, std::move(ctx)} , writer_timer_{ex} + , writer_cv_{ex} + , reader_timer_{ex} , reconnect_timer_{ex} , ping_timer_{ex} , receive_channel_{ex, 256} , st_{std::move(lgr)} { set_receive_adapter(any_adapter{ignore}); - writer_timer_.expires_at((std::chrono::steady_clock::time_point::max)()); + writer_cv_.expires_at((std::chrono::steady_clock::time_point::max)()); } void cancel(operation op) @@ -200,7 +200,7 @@ struct connection_impl { return asio::async_compose( exec_op{this, notifier, exec_fsm(st_.mpx, std::move(info))}, token, - writer_timer_); + writer_cv_); } void set_receive_adapter(any_adapter adapter) @@ -216,23 +216,37 @@ struct writer_op { explicit writer_op(connection_impl& conn) noexcept : conn_(&conn) - , fsm_(conn.st_.mpx, conn.st_.logger) + , fsm_(conn.st_.mpx, conn.st_.logger, conn.st_.ping_req) { } template void operator()(Self& self, system::error_code ec = {}, std::size_t bytes_written = 0u) { auto act = fsm_.resume(ec, bytes_written, self.get_cancellation_state().cancelled()); + // TODO: I think the timeout should be embedded in the action - switch (act.type) { - case writer_action_type::done: self.complete(act.ec); return; + switch (act.type()) { + case writer_action_type::done: self.complete(act.error()); return; case writer_action_type::write: - asio::async_write( - conn_->stream_, - asio::buffer(conn_->st_.mpx.get_write_buffer()), - std::move(self)); + if (conn_->st_.cfg.health_check_interval.count() != 0) { + // If nothing has been written in the health check interval, consider the connection as dead + auto* conn = conn_; + conn->stream_.async_write_some( + asio::buffer(act.write_buffer()), + asio::cancel_after( + conn->writer_timer_, + conn->st_.cfg.health_check_interval, + std::move(self))); + } else { + conn_->stream_.async_write_some(asio::buffer(act.write_buffer()), std::move(self)); + } + return; + case writer_action_type::wait: + if (conn_->st_.cfg.health_check_interval.count() != 0) { + conn_->writer_cv_.expires_after(conn_->st_.cfg.health_check_interval); + } + conn_->writer_cv_.async_wait(std::move(self)); return; - case writer_action_type::wait: conn_->writer_timer_.async_wait(std::move(self)); return; } } }; @@ -256,10 +270,24 @@ struct reader_op { switch (act.type_) { case reader_fsm::action::type::read_some: { - auto const buf = conn_->st_.mpx.get_prepared_read_buffer(); - conn_->stream_.async_read_some(asio::buffer(buf), std::move(self)); - } + auto const buf = conn_->mpx_.get_prepared_read_buffer(); + if (conn_->st_.cfg.health_check_interval.count() != 0) { + // TODO: timeouts should be encoded in the actions + // The writer might be at most health_check_interval writing + // nothing until it sends a PING. Wait for at most another health_check_interval + // before considering the connection dead + auto* conn = conn_; + conn->stream_.async_read_some( + asio::buffer(buf), + asio::cancel_after( + conn->reader_timer_, + 2 * conn->st_.cfg.health_check_interval, + std::move(self))); + } else { + conn_->stream_.async_read_some(asio::buffer(buf), std::move(self)); + } return; + } case reader_fsm::action::type::notify_push_receiver: if (conn_->receive_channel_.try_send(ec, act.push_size_)) { continue; @@ -274,107 +302,9 @@ struct reader_op { } }; -template -struct health_checker_op { - connection_impl* conn_; - asio::coroutine coro_{}; - - system::error_code check_errors(system::error_code io_ec, asio::cancellation_type_t cancel_state) - { - // Did we have a cancellation? We might not have an error code here - if ((cancel_state & asio::cancellation_type_t::terminal) != asio::cancellation_type_t::none) { - conn_->st_.logger.log(logger::level::info, "Health checker: cancelled"); - return asio::error::operation_aborted; - } - - // operation_aborted and no cancel state means that asio::cancel_after timed out - if (io_ec == asio::error::operation_aborted) { - conn_->st_.logger.log(logger::level::info, "Health checker: ping timed out"); - return error::pong_timeout; - } - - // Did we have other unknown error? - if (io_ec) { - conn_->st_.logger.log(logger::level::info, "Health checker: ping error", io_ec); - return io_ec; - } - - // Did the server answer with an error? - if (conn_->st_.ping_resp.has_error()) { - auto error = conn_->st_.ping_resp.error(); - conn_->st_.logger.log( - logger::level::info, - "Health checker: server answered ping with an error", - error.diagnostic); - return resp3_type_to_error(error.data_type); - } - - // No error - return system::error_code(); - } - -public: - health_checker_op(connection_impl& conn) noexcept - : conn_{&conn} - { } - - template - void operator()(Self& self, system::error_code ec = {}, std::size_t = {}) - { - BOOST_ASIO_CORO_REENTER(coro_) - { - if (conn_->st_.cfg.health_check_interval == std::chrono::seconds::zero()) { - conn_->st_.logger.trace("ping_op (1): timeout disabled."); - - // Wait until we're cancelled. This simplifies parallel group handling a lot - conn_->ping_timer_.expires_at((std::chrono::steady_clock::time_point::max)()); - BOOST_ASIO_CORO_YIELD conn_->ping_timer_.async_wait(std::move(self)); - self.complete(asio::error::operation_aborted); - return; - } - - for (;;) { - // Clean up any previous leftover - clear_response(conn_->st_.ping_resp); - - // Execute the request - BOOST_ASIO_CORO_YIELD - { - auto* conn = conn_; // avoid use-after-move problems - auto timeout = conn->st_.cfg.health_check_interval; - conn->async_exec( - conn->st_.ping_req, - any_adapter{conn->st_.ping_resp}, - asio::cancel_after(conn->ping_timer_, timeout, std::move(self))); - } - - // Check for cancellations and errors in PING - ec = check_errors(ec, self.get_cancellation_state().cancelled()); - if (ec) { - self.complete(ec); - return; - } - - // Wait before pinging again. - conn_->ping_timer_.expires_after(conn_->st_.cfg.health_check_interval); - - BOOST_ASIO_CORO_YIELD - conn_->ping_timer_.async_wait(std::move(self)); - - if (is_cancelled(self)) { - conn_->st_.logger.trace("ping_op (2): cancelled"); - self.complete(asio::error::operation_aborted); - return; - } - } - } - } -}; - system::error_code translate_parallel_group_errors( - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec); @@ -420,7 +350,7 @@ class run_op { return asio::async_compose( reader_op{*conn_}, std::forward(token), - conn_->writer_timer_); + conn_->writer_cv_); } template @@ -429,16 +359,7 @@ class run_op { return asio::async_compose( writer_op{*conn_}, std::forward(token), - conn_->writer_timer_); - } - - template - auto health_checker(CompletionToken&& token) - { - return asio::async_compose( - health_checker_op{*conn_}, - std::forward(token), - conn_->writer_timer_); + conn_->writer_cv_); } public: @@ -450,15 +371,12 @@ class run_op { template void operator()( Self& self, - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec) { - (*this)( - self, - translate_parallel_group_errors(order, setup_ec, health_check_ec, reader_ec, writer_ec)); + (*this)(self, translate_parallel_group_errors(order, setup_ec, reader_ec, writer_ec)); } template @@ -483,9 +401,6 @@ class run_op { [this](auto token) { return this->send_setup(token); }, - [this](auto token) { - return this->health_checker(token); - }, [this](auto token) { return this->reader(token); }, @@ -626,7 +541,7 @@ class basic_connection { { } /// Returns the associated executor. - executor_type get_executor() noexcept { return impl_->writer_timer_.get_executor(); } + executor_type get_executor() noexcept { return impl_->writer_cv_.get_executor(); } /** @brief Starts the underlying connection operations. * @@ -1046,7 +961,7 @@ class basic_connection { asio::async_compose( detail::run_op{self}, token_with_slot, - self->writer_timer_); + self->writer_cv_); } }; diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 86f11cc2..b42cee99 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -9,10 +9,14 @@ #ifndef BOOST_REDIS_WRITER_FSM_HPP #define BOOST_REDIS_WRITER_FSM_HPP +#include +#include + #include #include #include +#include // Sans-io algorithm for the writer task, as a finite state machine @@ -30,29 +34,57 @@ enum class writer_action_type wait, // Wait until there is data to be written }; -struct writer_action { - writer_action_type type; - system::error_code ec; +class writer_action { + writer_action_type type_; + union { + system::error_code ec_; + std::string_view write_buffer_; + }; +public: writer_action(writer_action_type type) noexcept - : type{type} + : type_{type} { } + writer_action_type type() const { return type_; } + writer_action(system::error_code ec) noexcept - : type{writer_action_type::done} - , ec{ec} + : type_{writer_action_type::done} + , ec_{ec} { } + + static writer_action write(std::string_view buffer) + { + auto res = writer_action{writer_action_type::write}; + res.write_buffer_ = buffer; + return res; + } + + system::error_code error() const + { + BOOST_ASSERT(type_ == writer_action_type::done); + return ec_; + } + + std::string_view write_buffer() const + { + BOOST_ASSERT(type_ == writer_action_type::write); + return write_buffer_; + } }; class writer_fsm { int resume_point_{0}; multiplexer* mpx_; connection_logger* logger_; + const request* ping_req_; + std::size_t write_offset_{}; public: - writer_fsm(multiplexer& mpx, connection_logger& logger) noexcept + writer_fsm(multiplexer& mpx, connection_logger& logger, const request& ping_req) noexcept : mpx_(&mpx) , logger_(&logger) + , ping_req_(&ping_req) { } writer_action resume( diff --git a/include/boost/redis/impl/connection.ipp b/include/boost/redis/impl/connection.ipp index b23cc8b4..f127a469 100644 --- a/include/boost/redis/impl/connection.ipp +++ b/include/boost/redis/impl/connection.ipp @@ -22,9 +22,8 @@ logger detail::make_stderr_logger(logger::level lvl, std::string prefix) } system::error_code detail::translate_parallel_group_errors( - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec) { @@ -41,9 +40,8 @@ system::error_code detail::translate_parallel_group_errors( // excluding the setup task std::size_t task_number = order[0] == 0u ? order[1] : order[0]; switch (task_number) { - case 1u: return health_check_ec; - case 2u: return reader_ec; - case 3u: return writer_ec; + case 1u: return reader_ec; + case 2u: return writer_ec; default: BOOST_ASSERT(false); return system::error_code(); } } diff --git a/include/boost/redis/impl/reader_fsm.ipp b/include/boost/redis/impl/reader_fsm.ipp index e21fa144..a1af89ff 100644 --- a/include/boost/redis/impl/reader_fsm.ipp +++ b/include/boost/redis/impl/reader_fsm.ipp @@ -48,11 +48,22 @@ reader_fsm::action reader_fsm::resume( // Process the bytes read, even if there was an error st.mpx.commit_read(bytes_read); + // Check for cancellations + if (is_terminal_cancel(cancel_state)) { + return {asio::error::operation_aborted}; + } + // Check for read errors if (ec) { // TODO: If an error occurred but data was read (i.e. // bytes_read != 0) we should try to process that data and // deliver it to the user before calling cancel_run. + if (ec == asio::error::operation_aborted) { + // The read timed out (because of cancel_after, not because of external cancellation). + // This means that we didn't receive any data when we were expecting it + ec = error::pong_timeout; + } + return {ec}; } diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index bb4304d2..99ff3239 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -9,6 +9,7 @@ #ifndef BOOST_REDIS_WRITER_FSM_IPP #define BOOST_REDIS_WRITER_FSM_IPP +#include #include #include #include @@ -24,6 +25,32 @@ namespace boost::redis::detail { +// TODO: we should probably make any_adapter directly constructible from adapters +void process_ping_node( + connection_logger& lgr, + resp3::basic_node const& nd, + system::error_code& ec) +{ + switch (nd.data_type) { + case resp3::type::simple_error: ec = redis::error::resp3_simple_error; break; + case resp3::type::blob_error: ec = redis::error::resp3_blob_error; break; + default: ; + } + + if (ec) { + lgr.log(logger::level::info, "Health checker: server answered ping with an error", nd.value); + } +} + +inline any_adapter make_ping_adapter(connection_logger& lgr) +{ + return any_adapter{any_adapter::impl_t{ + [&lgr](any_adapter::parse_event evt, resp3::node_view const& nd, system::error_code& ec) { + if (evt == any_adapter::parse_event::node) + process_ping_node(lgr, nd, ec); + }}}; +} + writer_action writer_fsm::resume( system::error_code ec, std::size_t bytes_written, @@ -35,26 +62,37 @@ writer_action writer_fsm::resume( for (;;) { // Attempt to write while we have requests ready to send while (mpx_->prepare_write() != 0u) { - // Write - BOOST_REDIS_YIELD(resume_point_, 1, writer_action_type::write) - - // Mark requests as written - if (!ec) - mpx_->commit_write(); - - // Check for cancellations - if (is_terminal_cancel(cancel_state)) { - logger_->trace("Writer task: cancelled (1)."); - return system::error_code(asio::error::operation_aborted); + // Write. We need to account for short writes + write_offset_ = 0u; + while (write_offset_ < mpx_->get_write_buffer().size()) { + // Write what we can + BOOST_REDIS_YIELD( + resume_point_, + 1, + writer_action::write(mpx_->get_write_buffer().substr(write_offset_))) + + // Update the offset + write_offset_ += bytes_written; + + // Check for cancellations + if (is_terminal_cancel(cancel_state)) { + logger_->trace("Writer task: cancelled (1)."); + return system::error_code(asio::error::operation_aborted); + } + + // Log what we wrote + logger_->on_write(ec, bytes_written); + + // Check for errors + // TODO: translate operation_aborted to another error code for clarity. + // pong_timeout is probably not the best option here - maybe write_timeout? + if (ec) { + return ec; + } } - // Log what we wrote - logger_->on_write(ec, bytes_written); - - // Check for errors - if (ec) { - return ec; - } + // We finished writing a message successfully. Mark requests as written + mpx_->commit_write(); } // No more requests ready to be written. Wait for more @@ -65,6 +103,13 @@ writer_action writer_fsm::resume( logger_->trace("Writer task: cancelled (2)."); return system::error_code(asio::error::operation_aborted); } + + // If we weren't notified, it's because there is no data and we should send a health check + if (!ec) { + auto elem = make_elem(*ping_req_, make_ping_adapter(*logger_)); + elem->set_done_callback([] { }); + mpx_->add(elem); + } } } From c405188f536f5382e03312e549c29d3d9ca4f63d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 11:52:29 +0200 Subject: [PATCH 02/41] Use connection_state in the writer --- include/boost/redis/connection.hpp | 9 ++++++--- include/boost/redis/detail/writer_fsm.hpp | 13 +++---------- include/boost/redis/impl/writer_fsm.ipp | 20 +++++++++++--------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index faf45119..49b59239 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -216,13 +216,16 @@ struct writer_op { explicit writer_op(connection_impl& conn) noexcept : conn_(&conn) - , fsm_(conn.st_.mpx, conn.st_.logger, conn.st_.ping_req) { } template void operator()(Self& self, system::error_code ec = {}, std::size_t bytes_written = 0u) { - auto act = fsm_.resume(ec, bytes_written, self.get_cancellation_state().cancelled()); + auto act = fsm_.resume( + conn_->st_, + ec, + bytes_written, + self.get_cancellation_state().cancelled()); // TODO: I think the timeout should be embedded in the action switch (act.type()) { @@ -270,7 +273,7 @@ struct reader_op { switch (act.type_) { case reader_fsm::action::type::read_some: { - auto const buf = conn_->mpx_.get_prepared_read_buffer(); + auto const buf = conn_->st_.mpx.get_prepared_read_buffer(); if (conn_->st_.cfg.health_check_interval.count() != 0) { // TODO: timeouts should be encoded in the actions // The writer might be at most health_check_interval writing diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index b42cee99..93e2ae06 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -9,8 +9,7 @@ #ifndef BOOST_REDIS_WRITER_FSM_HPP #define BOOST_REDIS_WRITER_FSM_HPP -#include -#include +#include #include #include @@ -75,19 +74,13 @@ class writer_action { class writer_fsm { int resume_point_{0}; - multiplexer* mpx_; - connection_logger* logger_; - const request* ping_req_; std::size_t write_offset_{}; public: - writer_fsm(multiplexer& mpx, connection_logger& logger, const request& ping_req) noexcept - : mpx_(&mpx) - , logger_(&logger) - , ping_req_(&ping_req) - { } + writer_fsm() noexcept = default; writer_action resume( + connection_state& st, system::error_code ec, std::size_t bytes_written, asio::cancellation_type_t cancel_state); diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 99ff3239..42362957 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ inline any_adapter make_ping_adapter(connection_logger& lgr) } writer_action writer_fsm::resume( + connection_state& st, system::error_code ec, std::size_t bytes_written, asio::cancellation_type_t cancel_state) @@ -61,27 +63,27 @@ writer_action writer_fsm::resume( for (;;) { // Attempt to write while we have requests ready to send - while (mpx_->prepare_write() != 0u) { + while (st.mpx.prepare_write() != 0u) { // Write. We need to account for short writes write_offset_ = 0u; - while (write_offset_ < mpx_->get_write_buffer().size()) { + while (write_offset_ < st.mpx.get_write_buffer().size()) { // Write what we can BOOST_REDIS_YIELD( resume_point_, 1, - writer_action::write(mpx_->get_write_buffer().substr(write_offset_))) + writer_action::write(st.mpx.get_write_buffer().substr(write_offset_))) // Update the offset write_offset_ += bytes_written; // Check for cancellations if (is_terminal_cancel(cancel_state)) { - logger_->trace("Writer task: cancelled (1)."); + st.logger.trace("Writer task: cancelled (1)."); return system::error_code(asio::error::operation_aborted); } // Log what we wrote - logger_->on_write(ec, bytes_written); + st.logger.on_write(ec, bytes_written); // Check for errors // TODO: translate operation_aborted to another error code for clarity. @@ -92,7 +94,7 @@ writer_action writer_fsm::resume( } // We finished writing a message successfully. Mark requests as written - mpx_->commit_write(); + st.mpx.commit_write(); } // No more requests ready to be written. Wait for more @@ -100,15 +102,15 @@ writer_action writer_fsm::resume( // Check for cancellations if (is_terminal_cancel(cancel_state)) { - logger_->trace("Writer task: cancelled (2)."); + st.logger.trace("Writer task: cancelled (2)."); return system::error_code(asio::error::operation_aborted); } // If we weren't notified, it's because there is no data and we should send a health check if (!ec) { - auto elem = make_elem(*ping_req_, make_ping_adapter(*logger_)); + auto elem = make_elem(st.ping_req, make_ping_adapter(st.logger)); elem->set_done_callback([] { }); - mpx_->add(elem); + st.mpx.add(elem); } } } From 18267e2c87492c936b2d02cdc887302e8bd3b884 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 12:16:59 +0200 Subject: [PATCH 03/41] writer timeouts in actions --- include/boost/redis/connection.hpp | 23 ++++++-------- include/boost/redis/detail/writer_fsm.hpp | 37 +++++++++++++++++------ include/boost/redis/impl/writer_fsm.ipp | 14 ++++++--- test/test_writer_fsm.cpp | 24 +++++++-------- 4 files changed, 58 insertions(+), 40 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 49b59239..07ee5a6b 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -221,34 +221,29 @@ struct writer_op { template void operator()(Self& self, system::error_code ec = {}, std::size_t bytes_written = 0u) { + auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after auto act = fsm_.resume( - conn_->st_, + conn->st_, ec, bytes_written, self.get_cancellation_state().cancelled()); - // TODO: I think the timeout should be embedded in the action switch (act.type()) { case writer_action_type::done: self.complete(act.error()); return; - case writer_action_type::write: - if (conn_->st_.cfg.health_check_interval.count() != 0) { - // If nothing has been written in the health check interval, consider the connection as dead - auto* conn = conn_; + case writer_action_type::write_some: + if (auto timeout = act.timeout(); timeout.count() != 0) { conn->stream_.async_write_some( asio::buffer(act.write_buffer()), - asio::cancel_after( - conn->writer_timer_, - conn->st_.cfg.health_check_interval, - std::move(self))); + asio::cancel_after(conn->writer_timer_, timeout, std::move(self))); } else { - conn_->stream_.async_write_some(asio::buffer(act.write_buffer()), std::move(self)); + conn->stream_.async_write_some(asio::buffer(act.write_buffer()), std::move(self)); } return; case writer_action_type::wait: - if (conn_->st_.cfg.health_check_interval.count() != 0) { - conn_->writer_cv_.expires_after(conn_->st_.cfg.health_check_interval); + if (auto timeout = act.timeout(); timeout.count() != 0) { + conn->writer_cv_.expires_after(timeout); } - conn_->writer_cv_.async_wait(std::move(self)); + conn->writer_cv_.async_wait(std::move(self)); return; } } diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 93e2ae06..671c89a1 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -28,16 +29,19 @@ class multiplexer; // What should we do next? enum class writer_action_type { - done, // Call the final handler - write, // Issue a write on the stream - wait, // Wait until there is data to be written + done, // Call the final handler + write_some, // Issue a write on the stream + wait, // Wait until there is data to be written }; class writer_action { writer_action_type type_; union { system::error_code ec_; - std::string_view write_buffer_; + struct { + std::string_view buffer; + std::chrono::steady_clock::duration timeout; + } buff_timeout; }; public: @@ -52,10 +56,19 @@ class writer_action { , ec_{ec} { } - static writer_action write(std::string_view buffer) + static writer_action write_some( + std::string_view buffer, + std::chrono::steady_clock::duration timeout) { - auto res = writer_action{writer_action_type::write}; - res.write_buffer_ = buffer; + auto res = writer_action{writer_action_type::write_some}; + res.buff_timeout = {buffer, timeout}; + return res; + } + + static writer_action wait(std::chrono::steady_clock::duration timeout) + { + auto res = writer_action{writer_action_type::wait}; + res.buff_timeout = {{}, timeout}; return res; } @@ -67,8 +80,14 @@ class writer_action { std::string_view write_buffer() const { - BOOST_ASSERT(type_ == writer_action_type::write); - return write_buffer_; + BOOST_ASSERT(type_ == writer_action_type::write_some); + return buff_timeout.buffer; + } + + std::chrono::steady_clock::duration timeout() const + { + BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait); + return buff_timeout.timeout; } }; diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 42362957..db864bb0 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -64,14 +64,18 @@ writer_action writer_fsm::resume( for (;;) { // Attempt to write while we have requests ready to send while (st.mpx.prepare_write() != 0u) { - // Write. We need to account for short writes + // Write an entire message. We can't use asio::async_write because we want + // to apply timeouts to individual write operations write_offset_ = 0u; while (write_offset_ < st.mpx.get_write_buffer().size()) { - // Write what we can + // Write what we can. If nothing has been written for the health check + // interval, we consider the connection as failed BOOST_REDIS_YIELD( resume_point_, 1, - writer_action::write(st.mpx.get_write_buffer().substr(write_offset_))) + writer_action::write_some( + st.mpx.get_write_buffer().substr(write_offset_), + st.cfg.health_check_interval)) // Update the offset write_offset_ += bytes_written; @@ -97,8 +101,8 @@ writer_action writer_fsm::resume( st.mpx.commit_write(); } - // No more requests ready to be written. Wait for more - BOOST_REDIS_YIELD(resume_point_, 2, writer_action_type::wait) + // No more requests ready to be written. Wait for more, or until we need to send a PING + BOOST_REDIS_YIELD(resume_point_, 2, writer_action::wait(st.cfg.health_check_interval)) // Check for cancellations if (is_terminal_cancel(cancel_state)) { diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 61674693..b7266300 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -38,10 +38,10 @@ using detail::connection_logger; static const char* to_string(writer_action_type value) { switch (value) { - case writer_action_type::done: return "writer_action_type::done"; - case writer_action_type::write: return "writer_action_type::write"; - case writer_action_type::wait: return "writer_action_type::wait"; - default: return ""; + case writer_action_type::done: return "writer_action_type::done"; + case writer_action_type::write_some: return "writer_action_type::write"; + case writer_action_type::wait: return "writer_action_type::wait"; + default: return ""; } } @@ -106,7 +106,7 @@ void test_single_request() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. @@ -119,7 +119,7 @@ void test_single_request() // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item2.elm->is_staged()); // Write successful @@ -146,7 +146,7 @@ void test_request_arrives_while_writing() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_staged()); // While the write is outstanding, a new request arrives @@ -155,7 +155,7 @@ void test_request_arrives_while_writing() // The write completes successfully. The request is written, // and we start writing the new one act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_written()); BOOST_TEST(item2.elm->is_staged()); @@ -187,7 +187,7 @@ void test_no_request_at_startup() // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write successful @@ -213,7 +213,7 @@ void test_write_error() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // The write completes with an error (possibly with partial success). @@ -241,7 +241,7 @@ void test_cancel_write() // Start. A write is triggered auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write cancelled and failed with operation_aborted @@ -267,7 +267,7 @@ void test_cancel_write_edge() // Start. A write is triggered auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write cancelled but without error From 4f0ada48b9c1edd52c4223d25065566a60e1de2f Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 12:21:48 +0200 Subject: [PATCH 04/41] Fix possible problems with no timeouts --- include/boost/redis/connection.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 07ee5a6b..261944fe 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -242,6 +242,8 @@ struct writer_op { case writer_action_type::wait: if (auto timeout = act.timeout(); timeout.count() != 0) { conn->writer_cv_.expires_after(timeout); + } else { + conn->writer_cv_.expires_at((std::chrono::steady_clock::time_point::max)()); } conn->writer_cv_.async_wait(std::move(self)); return; From 3fd35fe6f80a68a94b8db72beaca6fc0abb38bd5 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 17:52:50 +0200 Subject: [PATCH 05/41] make action a variant --- include/boost/redis/connection.hpp | 8 +++---- include/boost/redis/detail/reader_fsm.hpp | 24 ++++++++++++++++--- test/test_reader_fsm.cpp | 29 ++++++++++++++--------- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 261944fe..132a92cd 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -267,7 +267,7 @@ struct reader_op { for (;;) { auto act = fsm_.resume(conn_->st_, n, ec, self.get_cancellation_state().cancelled()); - switch (act.type_) { + switch (act.get_type()) { case reader_fsm::action::type::read_some: { auto const buf = conn_->st_.mpx.get_prepared_read_buffer(); @@ -289,14 +289,14 @@ struct reader_op { return; } case reader_fsm::action::type::notify_push_receiver: - if (conn_->receive_channel_.try_send(ec, act.push_size_)) { + if (conn_->receive_channel_.try_send(ec, act.push_size())) { continue; } else { - conn_->receive_channel_.async_send(ec, act.push_size_, std::move(self)); + conn_->receive_channel_.async_send(ec, act.push_size(), std::move(self)); return; } return; - case reader_fsm::action::type::done: self.complete(act.ec_); return; + case reader_fsm::action::type::done: self.complete(act.error()); return; } } } diff --git a/include/boost/redis/detail/reader_fsm.hpp b/include/boost/redis/detail/reader_fsm.hpp index e0d7de11..b9fb9033 100644 --- a/include/boost/redis/detail/reader_fsm.hpp +++ b/include/boost/redis/detail/reader_fsm.hpp @@ -21,7 +21,8 @@ class read_buffer; class reader_fsm { public: - struct action { + class action { + public: enum class type { read_some, @@ -44,9 +45,26 @@ class reader_fsm { return {type::notify_push_receiver, bytes}; } + type get_type() const { return type_; } + + system::error_code error() const + { + BOOST_ASSERT(type_ == type::done); + return ec_; + } + + std::size_t push_size() const + { + BOOST_ASSERT(type_ == type::notify_push_receiver); + return push_size_; + } + + private: type type_; - std::size_t push_size_{}; - system::error_code ec_; + union { + system::error_code ec_; + std::size_t push_size_{}; + }; }; action resume( diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index 1c204131..a42f8662 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -50,16 +50,23 @@ std::ostream& operator<<(std::ostream& os, action::type type) { return os << to_ bool operator==(const action& lhs, const action& rhs) noexcept { - return lhs.type_ == rhs.type_ && lhs.push_size_ == rhs.push_size_ && lhs.ec_ == rhs.ec_; + if (lhs.get_type() != rhs.get_type()) + return false; + if (lhs.get_type() == action::type::done) + return lhs.error() == rhs.error(); + if (lhs.get_type() == action::type::notify_push_receiver) + return lhs.push_size() == rhs.push_size(); + return true; } std::ostream& operator<<(std::ostream& os, const action& act) { - os << "action{ .type=" << act.type_; - if (act.type_ == action::type::done) - os << ", .error=" << act.ec_; - else if (act.type_ == action::type::notify_push_receiver) - os << ", .push_size=" << act.push_size_; + auto t = act.get_type(); + os << "action{ .type=" << t; + if (t == action::type::done) + os << ", .error=" << act.error(); + else if (t == action::type::notify_push_receiver) + os << ", .push_size=" << act.push_size(); return os << " }"; } @@ -94,7 +101,7 @@ void test_push() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::type::read_some); // The fsm is asking for data. std::string const payload = @@ -135,7 +142,7 @@ void test_read_needs_more() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::type::read_some); // Split the incoming message in three random parts and deliver // them to the reader individually. @@ -182,7 +189,7 @@ void test_read_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::type::read_some); // The fsm is asking for data. std::string const payload = ">1\r\n+msg1\r\n"; @@ -345,7 +352,7 @@ void test_cancel_push_delivery() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::type::read_some); // The fsm is asking for data. constexpr std::string_view payload = @@ -377,7 +384,7 @@ void test_cancel_push_delivery_edge() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::type::read_some); // The fsm is asking for data. constexpr std::string_view payload = From 0edc958a55326507727764d0e1d64d001ccc608e Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Wed, 15 Oct 2025 18:07:37 +0200 Subject: [PATCH 06/41] Make timeout part of the read action --- include/boost/redis/connection.hpp | 26 ++++++---------- include/boost/redis/detail/reader_fsm.hpp | 30 +++++++++++++------ include/boost/redis/impl/reader_fsm.ipp | 36 +++++++++++------------ 3 files changed, 47 insertions(+), 45 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 132a92cd..30a7dc37 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -265,35 +265,27 @@ struct reader_op { void operator()(Self& self, system::error_code ec = {}, std::size_t n = 0) { for (;;) { - auto act = fsm_.resume(conn_->st_, n, ec, self.get_cancellation_state().cancelled()); + auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after + auto act = fsm_.resume(conn->st_, n, ec, self.get_cancellation_state().cancelled()); switch (act.get_type()) { case reader_fsm::action::type::read_some: { - auto const buf = conn_->st_.mpx.get_prepared_read_buffer(); - if (conn_->st_.cfg.health_check_interval.count() != 0) { - // TODO: timeouts should be encoded in the actions - // The writer might be at most health_check_interval writing - // nothing until it sends a PING. Wait for at most another health_check_interval - // before considering the connection dead - auto* conn = conn_; + auto const buf = asio::buffer(conn->st_.mpx.get_prepared_read_buffer()); + if (act.timeout().count() != 0) { conn->stream_.async_read_some( - asio::buffer(buf), - asio::cancel_after( - conn->reader_timer_, - 2 * conn->st_.cfg.health_check_interval, - std::move(self))); + buf, + asio::cancel_after(conn->reader_timer_, act.timeout(), std::move(self))); } else { - conn_->stream_.async_read_some(asio::buffer(buf), std::move(self)); + conn->stream_.async_read_some(buf, std::move(self)); } return; } case reader_fsm::action::type::notify_push_receiver: - if (conn_->receive_channel_.try_send(ec, act.push_size())) { + if (conn->receive_channel_.try_send(ec, act.push_size())) { continue; } else { - conn_->receive_channel_.async_send(ec, act.push_size(), std::move(self)); - return; + conn->receive_channel_.async_send(ec, act.push_size(), std::move(self)); } return; case reader_fsm::action::type::done: self.complete(act.error()); return; diff --git a/include/boost/redis/detail/reader_fsm.hpp b/include/boost/redis/detail/reader_fsm.hpp index b9fb9033..3e7a7032 100644 --- a/include/boost/redis/detail/reader_fsm.hpp +++ b/include/boost/redis/detail/reader_fsm.hpp @@ -13,6 +13,7 @@ #include #include +#include #include namespace boost::redis::detail { @@ -30,20 +31,14 @@ class reader_fsm { done, }; - action(type t, std::size_t push_size = 0u) noexcept - : type_(t) - , push_size_(push_size) - { } - action(system::error_code ec) noexcept : type_(type::done) , ec_(ec) { } - static action notify_push_receiver(std::size_t bytes) - { - return {type::notify_push_receiver, bytes}; - } + static action read_some(std::chrono::steady_clock::duration timeout) { return {timeout}; } + + static action notify_push_receiver(std::size_t bytes) { return {bytes}; } type get_type() const { return type_; } @@ -53,6 +48,12 @@ class reader_fsm { return ec_; } + std::chrono::steady_clock::duration timeout() const + { + BOOST_ASSERT(type_ == type::read_some); + return timeout_; + } + std::size_t push_size() const { BOOST_ASSERT(type_ == type::notify_push_receiver); @@ -60,9 +61,20 @@ class reader_fsm { } private: + action(std::size_t push_size) noexcept + : type_(type::notify_push_receiver) + , push_size_(push_size) + { } + + action(std::chrono::steady_clock::duration t) noexcept + : type_(type::read_some) + , timeout_(t) + { } + type type_; union { system::error_code ec_; + std::chrono::steady_clock::duration timeout_; std::size_t push_size_{}; }; }; diff --git a/include/boost/redis/impl/reader_fsm.ipp b/include/boost/redis/impl/reader_fsm.ipp index a1af89ff..2ae05a6b 100644 --- a/include/boost/redis/impl/reader_fsm.ipp +++ b/include/boost/redis/impl/reader_fsm.ipp @@ -32,14 +32,23 @@ reader_fsm::action reader_fsm::resume( return {ec}; } - // Read + // Read. The connection might spend health_check_interval without writing data. + // Give it another health_check_interval for the response to arrive. + // If we don't get anything in this time, consider the connection as dead st.logger.trace("Reader task: issuing read"); - BOOST_REDIS_YIELD(resume_point_, 1, action::type::read_some) + BOOST_REDIS_YIELD(resume_point_, 1, action::read_some(2 * st.cfg.health_check_interval)) // Check for cancellations if (is_terminal_cancel(cancel_state)) { st.logger.trace("Reader task: cancelled (1)"); - return {asio::error::operation_aborted}; + return system::error_code(asio::error::operation_aborted); + } + + // Translate timeout errors caused by operation_aborted to more legible ones. + // A timeout here means that we didn't receive data in time. + // Note that cancellation is already handled by the above statement. + if (ec == asio::error::operation_aborted) { + ec = error::pong_timeout; } // Log what we read @@ -48,23 +57,12 @@ reader_fsm::action reader_fsm::resume( // Process the bytes read, even if there was an error st.mpx.commit_read(bytes_read); - // Check for cancellations - if (is_terminal_cancel(cancel_state)) { - return {asio::error::operation_aborted}; - } - // Check for read errors if (ec) { // TODO: If an error occurred but data was read (i.e. // bytes_read != 0) we should try to process that data and // deliver it to the user before calling cancel_run. - if (ec == asio::error::operation_aborted) { - // The read timed out (because of cancel_after, not because of external cancellation). - // This means that we didn't receive any data when we were expecting it - ec = error::pong_timeout; - } - - return {ec}; + return ec; } // Process the data that we've read @@ -75,7 +73,7 @@ reader_fsm::action reader_fsm::resume( // TODO: Perhaps log what has not been consumed to aid // debugging. st.logger.trace("Reader task: error processing message", ec); - return {ec}; + return ec; } if (res_.first == consume_result::needs_more) { @@ -88,13 +86,13 @@ reader_fsm::action reader_fsm::resume( // Check for cancellations if (is_terminal_cancel(cancel_state)) { st.logger.trace("Reader task: cancelled (2)"); - return {asio::error::operation_aborted}; + return system::error_code(asio::error::operation_aborted); } // Check for other errors if (ec) { st.logger.trace("Reader task: error notifying push receiver", ec); - return {ec}; + return ec; } } else { // TODO: Here we should notify the exec operation that @@ -109,7 +107,7 @@ reader_fsm::action reader_fsm::resume( } BOOST_ASSERT(false); - return {system::error_code()}; + return system::error_code(); } } // namespace boost::redis::detail From 884799b729e066ebbe75b037fa9dd8a7340e1f9e Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 12:59:55 +0200 Subject: [PATCH 07/41] Initial test --- test/test_conn_check_health.cpp | 76 +++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/test/test_conn_check_health.cpp b/test/test_conn_check_health.cpp index c50cdd9d..0bcdca3e 100644 --- a/test/test_conn_check_health.cpp +++ b/test/test_conn_check_health.cpp @@ -6,14 +6,18 @@ #include #include +#include #include #include +#include +#include #include #include "common.hpp" #include +#include namespace net = boost::asio; namespace redis = boost::redis; @@ -154,6 +158,77 @@ void test_disabled() BOOST_TEST(exec2_finished); } +class test_flexible { + net::io_context ioc; + net::steady_timer timer{ioc}; + connection conn1{ioc}, conn2{ioc}; + request req_publish; + bool exec_finished{false}, publisher_finished{false}; + + void start_generating_traffic() + { + conn2.async_exec(req_publish, ignore, [this](error_code ec, std::size_t) { + BOOST_TEST_EQ(ec, error_code()); + + if (exec_finished) { + conn2.cancel(); + publisher_finished = true; + } else { + timer.expires_after(100ms); + timer.async_wait([this](error_code ec) { + BOOST_TEST_EQ(ec, error_code()); + start_generating_traffic(); + }); + } + }); + } + +public: + test_flexible() = default; + + void run() + { + auto cfg = make_test_config(); + cfg.health_check_interval = 500ms; + boost::redis::generic_response resp; + + bool run1_finished = false, run2_finished = false; + + std::string channel_name = "abc"; // TODO: make this unique + req_publish.push("PUBLISH", channel_name, "test_health_check_flexible"); + + request req1; + req1.push("SUBSCRIBE", channel_name); + req1.push("BLPOP", "any", 2); + req1.get_config().cancel_if_unresponded = true; + req1.get_config().cancel_on_connection_lost = true; + + conn1.async_run(cfg, [&](error_code ec) { + run1_finished = true; + BOOST_TEST_EQ(ec, net::error::operation_aborted); + }); + + conn2.async_run(cfg, [&](error_code ec) { + run2_finished = true; + BOOST_TEST_EQ(ec, net::error::operation_aborted); + }); + + conn1.async_exec(req1, resp, [&](error_code ec, std::size_t) { + exec_finished = true; + BOOST_TEST_EQ(ec, error_code()); + conn1.cancel(); + }); + + start_generating_traffic(); + + ioc.run_for(test_timeout); + + BOOST_TEST(run1_finished); + BOOST_TEST(run2_finished); + BOOST_TEST(exec_finished); + } +}; + } // namespace int main() @@ -161,6 +236,7 @@ int main() test_reconnection(); test_error_code(); test_disabled(); + test_flexible().run(); return boost::report_errors(); } \ No newline at end of file From 60d957e5abfe75428d7a45fe0cefd4b8f3a67254 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:13:29 +0200 Subject: [PATCH 08/41] refactor --- test/test_conn_check_health.cpp | 59 +++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/test/test_conn_check_health.cpp b/test/test_conn_check_health.cpp index 0bcdca3e..78062e3b 100644 --- a/test/test_conn_check_health.cpp +++ b/test/test_conn_check_health.cpp @@ -16,6 +16,7 @@ #include "common.hpp" +#include #include #include @@ -25,6 +26,7 @@ using error_code = boost::system::error_code; using connection = boost::redis::connection; using boost::redis::request; using boost::redis::ignore; +using boost::redis::generic_response; using namespace std::chrono_literals; namespace { @@ -158,50 +160,69 @@ void test_disabled() BOOST_TEST(exec2_finished); } +// Receiving data is sufficient to consider our connection healthy. +// Sends a blocking request that causes PINGs to not be answered, +// and subscribes to a channel to receive pushes periodically. +// This simulates situations of heavy load, where PINGs may not be answered on time. class test_flexible { net::io_context ioc; + connection conn1{ioc}; // The one that simulates a heavy load condition + connection conn2{ioc}; // Publishes messages net::steady_timer timer{ioc}; - connection conn1{ioc}, conn2{ioc}; - request req_publish; - bool exec_finished{false}, publisher_finished{false}; + request publish_req; + bool run1_finished = false, run2_finished = false, exec_finished{false}, + publisher_finished{false}; - void start_generating_traffic() + // Starts publishing messages to the channel + void start_publish() { - conn2.async_exec(req_publish, ignore, [this](error_code ec, std::size_t) { + conn2.async_exec(publish_req, ignore, [this](error_code ec, std::size_t) { BOOST_TEST_EQ(ec, error_code()); if (exec_finished) { + // The blocking request finished, we're done conn2.cancel(); publisher_finished = true; } else { + // Wait for some time and publish again timer.expires_after(100ms); timer.async_wait([this](error_code ec) { BOOST_TEST_EQ(ec, error_code()); - start_generating_traffic(); + start_publish(); }); } }); } + // Generates a sufficiently unique name for channels so + // tests may be run in parallel for different configurations + static std::string make_unique_id() + { + auto t = std::chrono::high_resolution_clock::now(); + return "test-flexible-health-checks-" + std::to_string(t.time_since_epoch().count()); + } + public: test_flexible() = default; void run() { + // Setup auto cfg = make_test_config(); cfg.health_check_interval = 500ms; - boost::redis::generic_response resp; - - bool run1_finished = false, run2_finished = false; + generic_response resp; - std::string channel_name = "abc"; // TODO: make this unique - req_publish.push("PUBLISH", channel_name, "test_health_check_flexible"); + std::string channel_name = make_unique_id(); + publish_req.push("PUBLISH", channel_name, "test_health_check_flexible"); - request req1; - req1.push("SUBSCRIBE", channel_name); - req1.push("BLPOP", "any", 2); - req1.get_config().cancel_if_unresponded = true; - req1.get_config().cancel_on_connection_lost = true; + // This request will block for much longer than the health check + // interval. If we weren't receiving pushes, the connection would be considered dead. + // If this request finishes successfully, the health checker is doing good + request blocking_req; + blocking_req.push("SUBSCRIBE", channel_name); + blocking_req.push("BLPOP", "any", 2); + blocking_req.get_config().cancel_if_unresponded = true; + blocking_req.get_config().cancel_on_connection_lost = true; conn1.async_run(cfg, [&](error_code ec) { run1_finished = true; @@ -213,19 +234,21 @@ class test_flexible { BOOST_TEST_EQ(ec, net::error::operation_aborted); }); - conn1.async_exec(req1, resp, [&](error_code ec, std::size_t) { + // BLPOP will return NIL, so we can't use ignore + conn1.async_exec(blocking_req, resp, [&](error_code ec, std::size_t) { exec_finished = true; BOOST_TEST_EQ(ec, error_code()); conn1.cancel(); }); - start_generating_traffic(); + start_publish(); ioc.run_for(test_timeout); BOOST_TEST(run1_finished); BOOST_TEST(run2_finished); BOOST_TEST(exec_finished); + BOOST_TEST(publisher_finished); } }; From e2fd373c172c70dfdf8b670ca33da3cec1936073 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:21:52 +0200 Subject: [PATCH 09/41] Fix reader tests --- test/test_reader_fsm.cpp | 61 ++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index a42f8662..2eea1061 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -18,6 +18,7 @@ #include "sansio_utils.hpp" +#include #include namespace net = boost::asio; @@ -32,6 +33,7 @@ using redis::config; using redis::detail::connection_state; using action = redis::detail::reader_fsm::action; using redis::logger; +using namespace std::chrono_literals; // Operators static const char* to_string(action::type type) @@ -52,21 +54,28 @@ bool operator==(const action& lhs, const action& rhs) noexcept { if (lhs.get_type() != rhs.get_type()) return false; - if (lhs.get_type() == action::type::done) - return lhs.error() == rhs.error(); - if (lhs.get_type() == action::type::notify_push_receiver) - return lhs.push_size() == rhs.push_size(); - return true; + switch (lhs.get_type()) { + case action::type::done: return lhs.error() == rhs.error(); + case action::type::read_some: return lhs.timeout() == rhs.timeout(); + case action::type::notify_push_receiver: return lhs.push_size() == rhs.push_size(); + default: BOOST_ASSERT(false); return false; + } } std::ostream& operator<<(std::ostream& os, const action& act) { auto t = act.get_type(); os << "action{ .type=" << t; - if (t == action::type::done) - os << ", .error=" << act.error(); - else if (t == action::type::notify_push_receiver) - os << ", .push_size=" << act.push_size(); + switch (t) { + case action::type::done: os << ", .error=" << act.error(); break; + case action::type::read_some: + os << ", .timeout=" + << std::chrono::duration_cast(act.timeout()).count() << "ms"; + break; + case action::type::notify_push_receiver: os << ", .push_size=" << act.push_size(); break; + default: BOOST_ASSERT(false); + } + return os << " }"; } @@ -91,7 +100,11 @@ struct fixture : redis::detail::log_fixture { connection_state st{make_logger()}; generic_response resp; - fixture() { st.mpx.set_receive_adapter(any_adapter{resp}); } + fixture() + { + st.mpx.set_receive_adapter(any_adapter{resp}); + st.cfg.health_check_interval = 3s; + } }; void test_push() @@ -101,7 +114,7 @@ void test_push() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = @@ -125,7 +138,7 @@ void test_push() // All pushes were delivered so the fsm should demand more data act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Check logging fix.check_log({ @@ -142,7 +155,7 @@ void test_read_needs_more() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Split the incoming message in three random parts and deliver // them to the reader individually. @@ -151,12 +164,12 @@ void test_read_needs_more() // Passes the first part to the fsm. copy_to(fix.st.mpx, msg[0]); act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the second part to the fsm. copy_to(fix.st.mpx, msg[1]); act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the third and last part to the fsm, next it should ask us // to deliver the message. @@ -166,7 +179,7 @@ void test_read_needs_more() // All pushes were delivered so the fsm should demand more data act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Check logging fix.check_log({ @@ -189,7 +202,7 @@ void test_read_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">1\r\n+msg1\r\n"; @@ -215,7 +228,7 @@ void test_parse_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">a\r\n"; @@ -242,7 +255,7 @@ void test_push_deliver_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">1\r\n+msg1\r\n"; @@ -276,7 +289,7 @@ void test_max_read_buffer_size() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the first part to the fsm. std::string const part1 = ">3\r\n"; @@ -303,7 +316,7 @@ void test_cancel_read() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The read was cancelled (maybe after delivering some bytes) constexpr std::string_view payload = ">1\r\n"; @@ -329,7 +342,7 @@ void test_cancel_read_edge() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Deliver a push, and notify a cancellation. // This can happen if the cancellation signal arrives before the read handler runs @@ -352,7 +365,7 @@ void test_cancel_push_delivery() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. constexpr std::string_view payload = @@ -384,7 +397,7 @@ void test_cancel_push_delivery_edge() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. constexpr std::string_view payload = From b06c55415854372109a159ad8ad198e326979307 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:26:40 +0200 Subject: [PATCH 10/41] test health checks disabled --- test/test_reader_fsm.cpp | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index 2eea1061..2ba7daac 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -195,6 +195,44 @@ void test_read_needs_more() }); } +void test_health_checks_disabled() +{ + fixture fix; + reader_fsm fsm; + fix.st.cfg.health_check_interval = 0s; + + // Initiate + auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Split the message into two so we cover both the regular read and the needs more branch + constexpr std::string_view msg[] = {">3\r\n+msg1\r\n+ms", "g2\r\n+msg3\r\n"}; + + // Passes the first part to the fsm. + copy_to(fix.st.mpx, msg[0]); + act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Push delivery complete + copy_to(fix.st.mpx, msg[1]); + act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::notify_push_receiver(25u)); + + // All pushes were delivered so the fsm should demand more data + act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Check logging + fix.check_log({ + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 14 bytes read" }, + {logger::level::debug, "Reader task: incomplete message received"}, + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 11 bytes read" }, + {logger::level::debug, "Reader task: issuing read" }, + }); +} + void test_read_error() { fixture fix; @@ -429,6 +467,7 @@ int main() { test_push(); test_read_needs_more(); + test_health_checks_disabled(); test_read_error(); test_parse_error(); From 80ddad94c6f64ad3968dd6333af05f4ef86a1d0a Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:29:20 +0200 Subject: [PATCH 11/41] test read timeout --- test/test_reader_fsm.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index 2ba7daac..9cef97c7 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -259,6 +259,29 @@ void test_read_error() }); } +// A timeout in a read means that the connection is unhealthy (i.e. a PING timed out) +void test_read_timeout() +{ + fixture fix; + reader_fsm fsm; + + // Initiate + auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(6s)); + + // Timeout + act = fsm.resume(fix.st, 0, {net::error::operation_aborted}, cancellation_type_t::none); + BOOST_TEST_EQ(act, error_code{redis::error::pong_timeout}); + + // Check logging + fix.check_log({ + // clang-format off + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 0 bytes read, error: Pong timeout. [boost.redis:19]"}, + // clang-format on + }); +} + void test_parse_error() { fixture fix; @@ -470,6 +493,7 @@ int main() test_health_checks_disabled(); test_read_error(); + test_read_timeout(); test_parse_error(); test_push_deliver_error(); test_max_read_buffer_size(); From d3da43131f55cc3321663140c7ef1da804c9305d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:45:11 +0200 Subject: [PATCH 12/41] Fix comment --- include/boost/redis/config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/redis/config.hpp b/include/boost/redis/config.hpp index 0a2d666f..544b7d21 100644 --- a/include/boost/redis/config.hpp +++ b/include/boost/redis/config.hpp @@ -105,7 +105,7 @@ struct config { std::chrono::steady_clock::duration ssl_handshake_timeout = std::chrono::seconds{10}; /** @brief Time span between successive health checks. - * Set to zero to disable health-checks pass zero as duration. + * Set to zero to disable health-checks. */ std::chrono::steady_clock::duration health_check_interval = std::chrono::seconds{2}; From 943519510f773e3f2b94e45736e0f3ee3ce4b73d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:45:22 +0200 Subject: [PATCH 13/41] simplify writer --- include/boost/redis/impl/writer_fsm.ipp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index db864bb0..c06b14f4 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -26,7 +26,6 @@ namespace boost::redis::detail { -// TODO: we should probably make any_adapter directly constructible from adapters void process_ping_node( connection_logger& lgr, resp3::basic_node const& nd, @@ -45,11 +44,11 @@ void process_ping_node( inline any_adapter make_ping_adapter(connection_logger& lgr) { - return any_adapter{any_adapter::impl_t{ + return any_adapter{ [&lgr](any_adapter::parse_event evt, resp3::node_view const& nd, system::error_code& ec) { if (evt == any_adapter::parse_event::node) process_ping_node(lgr, nd, ec); - }}}; + }}; } writer_action writer_fsm::resume( From 9ecbdb4d6ceb70dc84e54ab6bc55b24c9704661b Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:45:45 +0200 Subject: [PATCH 14/41] Make writer tests build --- test/sansio_utils.hpp | 6 +++ test/test_reader_fsm.cpp | 3 +- test/test_writer_fsm.cpp | 106 ++++++++++++++++++++++++++------------- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/test/sansio_utils.hpp b/test/sansio_utils.hpp index 7428b116..47a49ff3 100644 --- a/test/sansio_utils.hpp +++ b/test/sansio_utils.hpp @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -44,6 +45,11 @@ struct log_fixture { logger make_logger(); }; +constexpr auto to_milliseconds(std::chrono::steady_clock::duration d) +{ + return std::chrono::duration_cast(d).count(); +} + } // namespace boost::redis::detail #endif // BOOST_REDIS_TEST_SANSIO_UTILS_HPP diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index 9cef97c7..f215a54d 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -69,8 +69,7 @@ std::ostream& operator<<(std::ostream& os, const action& act) switch (t) { case action::type::done: os << ", .error=" << act.error(); break; case action::type::read_some: - os << ", .timeout=" - << std::chrono::duration_cast(act.timeout()).count() << "ms"; + os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms"; break; case action::type::notify_push_receiver: os << ", .push_size=" << act.push_size(); break; default: BOOST_ASSERT(false); diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index b7266300..b95cc31e 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -7,6 +7,7 @@ // #include +#include #include #include #include @@ -20,6 +21,7 @@ #include "sansio_utils.hpp" +#include #include #include @@ -30,9 +32,11 @@ using detail::multiplexer; using detail::writer_action_type; using detail::consume_result; using detail::writer_action; +using detail::connection_state; using boost::system::error_code; using boost::asio::cancellation_type_t; using detail::connection_logger; +using namespace std::chrono_literals; // Operators static const char* to_string(writer_action_type value) @@ -55,14 +59,34 @@ std::ostream& operator<<(std::ostream& os, writer_action_type type) bool operator==(const writer_action& lhs, const writer_action& rhs) noexcept { - return lhs.type == rhs.type && lhs.ec == rhs.ec; + if (lhs.type() != rhs.type()) + return false; + switch (lhs.type()) { + case writer_action_type::done: return lhs.error() == rhs.error(); + case writer_action_type::write_some: + return lhs.write_buffer() == rhs.write_buffer() && lhs.timeout() == rhs.timeout(); + case writer_action_type::wait: return lhs.timeout() == rhs.timeout(); + default: BOOST_ASSERT(false); + } + return false; } std::ostream& operator<<(std::ostream& os, const writer_action& act) { - os << "writer_action{ .type=" << act.type; - if (act.type == writer_action_type::done) - os << ", .error=" << act.ec; + auto t = act.type(); + os << "writer_action{ .type=" << t; + switch (t) { + case writer_action_type::done: os << ", .error=" << act.error(); break; + case writer_action_type::write_some: + os << ", .write_buffer=" << act.write_buffer() + << ", timeout=" << to_milliseconds(act.timeout()) << "ms"; + break; + case writer_action_type::wait: + os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms"; + break; + default: BOOST_ASSERT(false); + } + return os << " }"; } @@ -89,9 +113,14 @@ struct test_elem { }; struct fixture : detail::log_fixture { - multiplexer mpx; - connection_logger lgr{make_logger()}; - writer_fsm fsm{mpx, lgr}; + connection_state st{make_logger()}; + writer_fsm fsm; + + fixture() + { + st.cfg.health_check_id = "my_health_check"; + st.cfg.health_check_interval = 4s; + } }; // A single request is written, then we wait and repeat @@ -102,28 +131,30 @@ void test_single_request() test_elem item1, item2; // A request arrives before the writer starts - fix.mpx.add(item1.elm); + fix.st.mpx.add(item1.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. - act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none); + act = fix.fsm + .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); BOOST_TEST(item1.elm->is_written()); // Another request arrives - fix.mpx.add(item2.elm); + fix.st.mpx.add(item2.elm); // The wait is cancelled to signal we've got a new request - act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item2.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none); + act = fix.fsm + .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); BOOST_TEST(item2.elm->is_written()); @@ -142,25 +173,27 @@ void test_request_arrives_while_writing() test_elem item1, item2; // A request arrives before the writer starts - fix.mpx.add(item1.elm); + fix.st.mpx.add(item1.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_staged()); // While the write is outstanding, a new request arrives - fix.mpx.add(item2.elm); + fix.st.mpx.add(item2.elm); // The write completes successfully. The request is written, // and we start writing the new one - act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none); + act = fix.fsm + .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item1.elm->is_written()); BOOST_TEST(item2.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none); + act = fix.fsm + .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); BOOST_TEST(item2.elm->is_written()); @@ -179,19 +212,19 @@ void test_no_request_at_startup() test_elem item; // Start. There is no request, so we wait - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); // A request arrives - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // The wait is cancelled to signal we've got a new request - act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::none); + act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); BOOST_TEST(item.elm->is_written()); @@ -209,17 +242,17 @@ void test_write_error() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // The write completes with an error (possibly with partial success). // The request is still staged, and the writer exits. // Use an error we control so we can check logs - act = fix.fsm.resume(error::empty_field, 2u, cancellation_type_t::none); + act = fix.fsm.resume(fix.st, error::empty_field, 2u, cancellation_type_t::none); BOOST_TEST_EQ(act, error_code(error::empty_field)); BOOST_TEST(item.elm->is_staged()); @@ -237,15 +270,15 @@ void test_cancel_write() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write cancelled and failed with operation_aborted - act = fix.fsm.resume(asio::error::operation_aborted, 2u, cancellation_type_t::terminal); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 2u, cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_staged()); @@ -263,15 +296,16 @@ void test_cancel_write_edge() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::write_some); BOOST_TEST(item.elm->is_staged()); // Write cancelled but without error - act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::terminal); + act = fix.fsm + .resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_written()); @@ -289,14 +323,18 @@ void test_cancel_wait() test_elem item; // Start. There is no request, so we wait - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); BOOST_TEST_EQ(act, writer_action_type::wait); // Sanity check: the writer doesn't touch the multiplexer after a cancellation - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Cancel the wait, setting the cancellation state - act = fix.fsm.resume(asio::error::operation_aborted, 0u, asio::cancellation_type_t::terminal); + act = fix.fsm.resume( + fix.st, + asio::error::operation_aborted, + 0u, + asio::cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_waiting()); From daa7903ae88c2cb2a52a4a87a0721fcd94dd38e7 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:49:38 +0200 Subject: [PATCH 15/41] stronger writer_action interface --- include/boost/redis/detail/writer_fsm.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 671c89a1..4d568978 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -41,14 +41,18 @@ class writer_action { struct { std::string_view buffer; std::chrono::steady_clock::duration timeout; - } buff_timeout; + } buff_timeout_; }; -public: - writer_action(writer_action_type type) noexcept + writer_action( + writer_action_type type, + std::string_view buff, + std::chrono::steady_clock::duration t) noexcept : type_{type} + , buff_timeout_{buff, t} { } +public: writer_action_type type() const { return type_; } writer_action(system::error_code ec) noexcept @@ -60,16 +64,12 @@ class writer_action { std::string_view buffer, std::chrono::steady_clock::duration timeout) { - auto res = writer_action{writer_action_type::write_some}; - res.buff_timeout = {buffer, timeout}; - return res; + return {writer_action_type::write_some, buffer, timeout}; } static writer_action wait(std::chrono::steady_clock::duration timeout) { - auto res = writer_action{writer_action_type::wait}; - res.buff_timeout = {{}, timeout}; - return res; + return {writer_action_type::wait, {}, timeout}; } system::error_code error() const @@ -81,13 +81,13 @@ class writer_action { std::string_view write_buffer() const { BOOST_ASSERT(type_ == writer_action_type::write_some); - return buff_timeout.buffer; + return buff_timeout_.buffer; } std::chrono::steady_clock::duration timeout() const { BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait); - return buff_timeout.timeout; + return buff_timeout_.timeout; } }; From 03f470ddbee9ad4238a662e4ef6e4332bf99167d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:54:20 +0200 Subject: [PATCH 16/41] Make writer_action use offset --- include/boost/redis/connection.hpp | 7 +++++-- include/boost/redis/detail/writer_fsm.hpp | 22 ++++++++++------------ include/boost/redis/impl/writer_fsm.ipp | 4 +--- test/test_writer_fsm.cpp | 4 ++-- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 30a7dc37..75f3c56b 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -231,14 +231,17 @@ struct writer_op { switch (act.type()) { case writer_action_type::done: self.complete(act.error()); return; case writer_action_type::write_some: + { + auto buf = asio::buffer(conn->st_.mpx.get_write_buffer().substr(act.write_offset())); if (auto timeout = act.timeout(); timeout.count() != 0) { conn->stream_.async_write_some( - asio::buffer(act.write_buffer()), + buf, asio::cancel_after(conn->writer_timer_, timeout, std::move(self))); } else { - conn->stream_.async_write_some(asio::buffer(act.write_buffer()), std::move(self)); + conn->stream_.async_write_some(buf, std::move(self)); } return; + } case writer_action_type::wait: if (auto timeout = act.timeout(); timeout.count() != 0) { conn->writer_cv_.expires_after(timeout); diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 4d568978..63f5bffb 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -39,17 +39,17 @@ class writer_action { union { system::error_code ec_; struct { - std::string_view buffer; + std::size_t offset; std::chrono::steady_clock::duration timeout; - } buff_timeout_; + } offset_timeout_; }; writer_action( writer_action_type type, - std::string_view buff, + std::size_t offset, std::chrono::steady_clock::duration t) noexcept : type_{type} - , buff_timeout_{buff, t} + , offset_timeout_{offset, t} { } public: @@ -60,16 +60,14 @@ class writer_action { , ec_{ec} { } - static writer_action write_some( - std::string_view buffer, - std::chrono::steady_clock::duration timeout) + static writer_action write_some(std::size_t offset, std::chrono::steady_clock::duration timeout) { - return {writer_action_type::write_some, buffer, timeout}; + return {writer_action_type::write_some, offset, timeout}; } static writer_action wait(std::chrono::steady_clock::duration timeout) { - return {writer_action_type::wait, {}, timeout}; + return {writer_action_type::wait, 0u, timeout}; } system::error_code error() const @@ -78,16 +76,16 @@ class writer_action { return ec_; } - std::string_view write_buffer() const + std::size_t write_offset() const { BOOST_ASSERT(type_ == writer_action_type::write_some); - return buff_timeout_.buffer; + return offset_timeout_.offset; } std::chrono::steady_clock::duration timeout() const { BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait); - return buff_timeout_.timeout; + return offset_timeout_.timeout; } }; diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index c06b14f4..2db3c161 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -72,9 +72,7 @@ writer_action writer_fsm::resume( BOOST_REDIS_YIELD( resume_point_, 1, - writer_action::write_some( - st.mpx.get_write_buffer().substr(write_offset_), - st.cfg.health_check_interval)) + writer_action::write_some(write_offset_, st.cfg.health_check_interval)) // Update the offset write_offset_ += bytes_written; diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index b95cc31e..68c9ec18 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -64,7 +64,7 @@ bool operator==(const writer_action& lhs, const writer_action& rhs) noexcept switch (lhs.type()) { case writer_action_type::done: return lhs.error() == rhs.error(); case writer_action_type::write_some: - return lhs.write_buffer() == rhs.write_buffer() && lhs.timeout() == rhs.timeout(); + return lhs.write_offset() == rhs.write_offset() && lhs.timeout() == rhs.timeout(); case writer_action_type::wait: return lhs.timeout() == rhs.timeout(); default: BOOST_ASSERT(false); } @@ -78,7 +78,7 @@ std::ostream& operator<<(std::ostream& os, const writer_action& act) switch (t) { case writer_action_type::done: os << ", .error=" << act.error(); break; case writer_action_type::write_some: - os << ", .write_buffer=" << act.write_buffer() + os << ", .write_buffer=" << act.write_offset() << ", timeout=" << to_milliseconds(act.timeout()) << "ms"; break; case writer_action_type::wait: From 1d44ac4a9b0e13225e5415d333140d1623cdeb7d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 13:56:23 +0200 Subject: [PATCH 17/41] Fix writer tests --- test/test_writer_fsm.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 68c9ec18..f67f8ca3 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -135,13 +135,13 @@ void test_single_request() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item1.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. act = fix.fsm .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item1.elm->is_written()); // Another request arrives @@ -149,13 +149,13 @@ void test_single_request() // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item2.elm->is_staged()); // Write successful act = fix.fsm .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item2.elm->is_written()); // Logs @@ -177,7 +177,7 @@ void test_request_arrives_while_writing() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item1.elm->is_staged()); // While the write is outstanding, a new request arrives @@ -187,14 +187,14 @@ void test_request_arrives_while_writing() // and we start writing the new one act = fix.fsm .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item1.elm->is_written()); BOOST_TEST(item2.elm->is_staged()); // Write successful act = fix.fsm .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item2.elm->is_written()); // Logs @@ -213,19 +213,19 @@ void test_no_request_at_startup() // Start. There is no request, so we wait auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); // A request arrives fix.st.mpx.add(item.elm); // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item.elm->is_staged()); // Write successful act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item.elm->is_written()); // Logs @@ -246,7 +246,7 @@ void test_write_error() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item.elm->is_staged()); // The write completes with an error (possibly with partial success). @@ -274,7 +274,7 @@ void test_cancel_write() // Start. A write is triggered auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled and failed with operation_aborted @@ -300,7 +300,7 @@ void test_cancel_write_edge() // Start. A write is triggered auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write_some); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled but without error @@ -324,7 +324,7 @@ void test_cancel_wait() // Start. There is no request, so we wait auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + BOOST_TEST_EQ(act, writer_action::wait(4s)); // Sanity check: the writer doesn't touch the multiplexer after a cancellation fix.st.mpx.add(item.elm); From 235ac6722f3564a8af57c60afb114c4632ea4b28 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 14:15:44 +0200 Subject: [PATCH 18/41] Fix a partial success problem --- include/boost/redis/impl/writer_fsm.ipp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 2db3c161..4ac5ee58 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -74,8 +74,11 @@ writer_action writer_fsm::resume( 1, writer_action::write_some(write_offset_, st.cfg.health_check_interval)) - // Update the offset + // Commit the received bytes. Do it before error checking to account for partial success + // TODO: I don't like how this is structured write_offset_ += bytes_written; + if (write_offset_ >= st.mpx.get_write_buffer().size()) + st.mpx.commit_write(); // Check for cancellations if (is_terminal_cancel(cancel_state)) { @@ -93,9 +96,6 @@ writer_action writer_fsm::resume( return ec; } } - - // We finished writing a message successfully. Mark requests as written - st.mpx.commit_write(); } // No more requests ready to be written. Wait for more, or until we need to send a PING From 7df92f394b01732f734f9fc454b9212f4973364c Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 14:15:54 +0200 Subject: [PATCH 19/41] short writes test --- test/test_writer_fsm.cpp | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index f67f8ca3..b6ba7eee 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -234,6 +234,50 @@ void test_no_request_at_startup() }); } +// We correctly handle short writes +void test_short_writes() +{ + // Setup + fixture fix; + test_elem item1; + + // A request arrives before the writer starts + fix.st.mpx.add(item1.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST(item1.elm->is_staged()); + + // We write a few bytes. It's not the entire message, so we write again + act = fix.fsm.resume(fix.st, error_code(), 2u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(2u, 4s)); + BOOST_TEST(item1.elm->is_staged()); + + // We write some more bytes, but still not the entire message. + act = fix.fsm.resume(fix.st, error_code(), 5u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(7u, 4s)); + BOOST_TEST(item1.elm->is_staged()); + + // A zero size write doesn't cause trouble + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(7u, 4s)); + BOOST_TEST(item1.elm->is_staged()); + + // Complete writing the message (the entire payload is 24 bytes long) + act = fix.fsm.resume(fix.st, error_code(), 17u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + BOOST_TEST(item1.elm->is_written()); + + // Logs + fix.check_log({ + {logger::level::info, "Writer task: 2 bytes written." }, + {logger::level::info, "Writer task: 5 bytes written." }, + {logger::level::info, "Writer task: 0 bytes written." }, + {logger::level::info, "Writer task: 17 bytes written."}, + }); +} + // A write error makes the writer exit void test_write_error() { @@ -351,6 +395,7 @@ int main() test_single_request(); test_request_arrives_while_writing(); test_no_request_at_startup(); + test_short_writes(); test_write_error(); From 0a79bf13e4f8fcb15b92da4a7612fdc2fb591e2a Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 16:31:22 +0200 Subject: [PATCH 20/41] add const --- include/boost/redis/detail/multiplexer.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/redis/detail/multiplexer.hpp b/include/boost/redis/detail/multiplexer.hpp index 040ba1d2..d038adf0 100644 --- a/include/boost/redis/detail/multiplexer.hpp +++ b/include/boost/redis/detail/multiplexer.hpp @@ -175,7 +175,7 @@ class multiplexer { void cancel_on_conn_lost(); [[nodiscard]] - auto get_write_buffer() noexcept -> std::string_view + auto get_write_buffer() const noexcept -> std::string_view { return std::string_view{write_buffer_}; } From 104149696b38e988c006b6df89c75a0f847d055b Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 16:31:47 +0200 Subject: [PATCH 21/41] rework writer 1 --- include/boost/redis/impl/writer_fsm.ipp | 33 ++++++++++++++----------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 4ac5ee58..0d5e4e5b 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -51,6 +51,11 @@ inline any_adapter make_ping_adapter(connection_logger& lgr) }}; } +inline bool write_done(const connection_state& st, std::size_t bytes_written) +{ + return bytes_written >= st.mpx.get_write_buffer().size(); +} + writer_action writer_fsm::resume( connection_state& st, system::error_code ec, @@ -66,7 +71,7 @@ writer_action writer_fsm::resume( // Write an entire message. We can't use asio::async_write because we want // to apply timeouts to individual write operations write_offset_ = 0u; - while (write_offset_ < st.mpx.get_write_buffer().size()) { + while (!write_done(st, write_offset_)) { // Write what we can. If nothing has been written for the health check // interval, we consider the connection as failed BOOST_REDIS_YIELD( @@ -74,28 +79,28 @@ writer_action writer_fsm::resume( 1, writer_action::write_some(write_offset_, st.cfg.health_check_interval)) - // Commit the received bytes. Do it before error checking to account for partial success - // TODO: I don't like how this is structured + // Commit the received bytes write_offset_ += bytes_written; - if (write_offset_ >= st.mpx.get_write_buffer().size()) - st.mpx.commit_write(); // Check for cancellations - if (is_terminal_cancel(cancel_state)) { - st.logger.trace("Writer task: cancelled (1)."); - return system::error_code(asio::error::operation_aborted); - } + // TODO: translate operation_aborted to another error code for clarity. + // pong_timeout is probably not the best option here - maybe write_timeout? + if (is_terminal_cancel(cancel_state)) + ec = asio::error::operation_aborted; // Log what we wrote st.logger.on_write(ec, bytes_written); // Check for errors - // TODO: translate operation_aborted to another error code for clarity. - // pong_timeout is probably not the best option here - maybe write_timeout? - if (ec) { - return ec; - } + if (ec) + break; } + + // Write complete. Process its results + if (write_done(st, write_offset_)) + st.mpx.commit_write(); + if (ec) + return ec; } // No more requests ready to be written. Wait for more, or until we need to send a PING From 75d9df91c2eae6fad57f787971e24f015aebb374 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 16:38:08 +0200 Subject: [PATCH 22/41] write_timeout error --- include/boost/redis/error.hpp | 5 ++++- include/boost/redis/impl/error.ipp | 2 ++ test/test_low_level.cpp | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/boost/redis/error.hpp b/include/boost/redis/error.hpp index a252789f..506d0139 100644 --- a/include/boost/redis/error.hpp +++ b/include/boost/redis/error.hpp @@ -68,7 +68,7 @@ enum class error /// Connect timeout connect_timeout, - /// Connect timeout + /// The server didn't answer the health checks on time and didn't send any data during the health check period. pong_timeout, /// SSL handshake timeout @@ -91,6 +91,9 @@ enum class error /// Reading data from the socket would exceed the maximum size allowed of the read buffer. exceeds_maximum_read_buffer_size, + + /// Timeout while writing data to the server. + write_timeout, }; /** diff --git a/include/boost/redis/impl/error.ipp b/include/boost/redis/impl/error.ipp index 5133feb1..dcd7ec67 100644 --- a/include/boost/redis/impl/error.ipp +++ b/include/boost/redis/impl/error.ipp @@ -54,6 +54,8 @@ struct error_category_impl : system::error_category { case error::exceeds_maximum_read_buffer_size: return "Reading data from the socket would exceed the maximum size allowed of the read " "buffer."; + case error::write_timeout: + return "Timeout while writing data to the server."; default: BOOST_ASSERT(false); return "Boost.Redis error."; } } diff --git a/test/test_low_level.cpp b/test/test_low_level.cpp index c173d750..dc68e0ec 100644 --- a/test/test_low_level.cpp +++ b/test/test_low_level.cpp @@ -529,6 +529,7 @@ BOOST_AUTO_TEST_CASE(cover_error) check_error("boost.redis", boost::redis::error::incompatible_node_depth); check_error("boost.redis", boost::redis::error::resp3_hello); check_error("boost.redis", boost::redis::error::exceeds_maximum_read_buffer_size); + check_error("boost.redis", boost::redis::error::write_timeout); } std::string get_type_as_str(boost::redis::resp3::type t) From 9c5f965c9ec4cd0d949c78ef57208443e2fab480 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 16:47:08 +0200 Subject: [PATCH 23/41] rework logging --- .../boost/redis/detail/connection_logger.hpp | 2 +- .../boost/redis/impl/connection_logger.ipp | 17 +++++------- include/boost/redis/impl/writer_fsm.ipp | 23 +++++++++------- test/test_writer_fsm.cpp | 27 ++++++++++--------- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/include/boost/redis/detail/connection_logger.hpp b/include/boost/redis/detail/connection_logger.hpp index 2b2800e2..b1cb28e6 100644 --- a/include/boost/redis/detail/connection_logger.hpp +++ b/include/boost/redis/detail/connection_logger.hpp @@ -36,7 +36,7 @@ class connection_logger { void on_connect(system::error_code const& ec, asio::ip::tcp::endpoint const& ep); void on_connect(system::error_code const& ec, std::string_view unix_socket_ep); void on_ssl_handshake(system::error_code const& ec); - void on_write(system::error_code const& ec, std::size_t n); + void on_write(std::size_t n); void on_read(system::error_code const& ec, std::size_t n); void on_setup(system::error_code const& ec, generic_response const& resp); void log(logger::level lvl, std::string_view msg); diff --git a/include/boost/redis/impl/connection_logger.ipp b/include/boost/redis/impl/connection_logger.ipp index dc5fa3e5..968f1705 100644 --- a/include/boost/redis/impl/connection_logger.ipp +++ b/include/boost/redis/impl/connection_logger.ipp @@ -114,21 +114,16 @@ void connection_logger::on_ssl_handshake(system::error_code const& ec) logger_.fn(logger::level::info, msg_); } -void connection_logger::on_write(system::error_code const& ec, std::size_t n) +void connection_logger::on_write(std::size_t n) { - if (logger_.lvl < logger::level::info) + if (logger_.lvl < logger::level::debug) return; - if (ec) { - msg_ = "Writer task error: "; - format_error_code(ec, msg_); - } else { - msg_ = "Writer task: "; - msg_ += std::to_string(n); - msg_ += " bytes written."; - } + msg_ = "Writer task: "; + msg_ += std::to_string(n); + msg_ += " bytes written."; - logger_.fn(logger::level::info, msg_); + logger_.fn(logger::level::debug, msg_); } void connection_logger::on_read(system::error_code const& ec, std::size_t bytes_read) diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 0d5e4e5b..10781582 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -79,28 +79,33 @@ writer_action writer_fsm::resume( 1, writer_action::write_some(write_offset_, st.cfg.health_check_interval)) - // Commit the received bytes + // Commit the received bytes. This accounts for partial success write_offset_ += bytes_written; + st.logger.on_write(bytes_written); - // Check for cancellations - // TODO: translate operation_aborted to another error code for clarity. - // pong_timeout is probably not the best option here - maybe write_timeout? + // Check for cancellations and translate error codes if (is_terminal_cancel(cancel_state)) ec = asio::error::operation_aborted; - - // Log what we wrote - st.logger.on_write(ec, bytes_written); + else if (ec == asio::error::operation_aborted) + ec = error::write_timeout; // Check for errors if (ec) break; } - // Write complete. Process its results + // Write complete. Process its results. Note that we may have partial success if (write_done(st, write_offset_)) st.mpx.commit_write(); - if (ec) + + if (ec) { + if (ec == asio::error::operation_aborted) { + st.logger.trace("Writer task: cancelled (1)."); + } else { + st.logger.trace("Writer task error", ec); + } return ec; + } } // No more requests ready to be written. Wait for more, or until we need to send a PING diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index b6ba7eee..49edece4 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -160,8 +160,8 @@ void test_single_request() // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, }); } @@ -199,8 +199,8 @@ void test_request_arrives_while_writing() // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, }); } @@ -230,7 +230,7 @@ void test_no_request_at_startup() // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, }); } @@ -271,10 +271,10 @@ void test_short_writes() // Logs fix.check_log({ - {logger::level::info, "Writer task: 2 bytes written." }, - {logger::level::info, "Writer task: 5 bytes written." }, - {logger::level::info, "Writer task: 0 bytes written." }, - {logger::level::info, "Writer task: 17 bytes written."}, + {logger::level::debug, "Writer task: 2 bytes written." }, + {logger::level::debug, "Writer task: 5 bytes written." }, + {logger::level::debug, "Writer task: 0 bytes written." }, + {logger::level::debug, "Writer task: 17 bytes written."}, }); } @@ -302,7 +302,8 @@ void test_write_error() // Logs fix.check_log({ - {logger::level::info, "Writer task error: Expected field value is empty. [boost.redis:5]"}, + {logger::level::debug, "Writer task: 2 bytes written." }, + {logger::level::debug, "Writer task error: Expected field value is empty. [boost.redis:5]"}, }); } @@ -328,7 +329,8 @@ void test_cancel_write() // Logs fix.check_log({ - {logger::level::debug, "Writer task: cancelled (1)."}, + {logger::level::debug, "Writer task: 2 bytes written."}, + {logger::level::debug, "Writer task: cancelled (1)." }, }); } @@ -355,7 +357,8 @@ void test_cancel_write_edge() // Logs fix.check_log({ - {logger::level::debug, "Writer task: cancelled (1)."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: cancelled (1)." }, }); } From df7554d2c0f463a9adef5783eadacea0f6d3a228 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 16:50:02 +0200 Subject: [PATCH 24/41] write timeout test --- test/test_writer_fsm.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 49edece4..880aa32e 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -307,6 +307,33 @@ void test_write_error() }); } +void test_write_timeout() +{ + // Setup + fixture fix; + test_elem item; + + // A request arrives before the writer starts + fix.st.mpx.add(item.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST(item.elm->is_staged()); + + // The write times out, so it completes with operation_aborted + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, error_code(error::write_timeout)); + BOOST_TEST(item.elm->is_staged()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 0 bytes written." }, + {logger::level::debug, + "Writer task error: Timeout while writing data to the server. [boost.redis:27]"}, + }); +} + // A write is cancelled void test_cancel_write() { @@ -401,6 +428,7 @@ int main() test_short_writes(); test_write_error(); + test_write_timeout(); test_cancel_write(); test_cancel_write_edge(); From 678f0c2079daddda562468b7159c7420605a94cb Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 17:08:43 +0200 Subject: [PATCH 25/41] ping success --- include/boost/redis/detail/writer_fsm.hpp | 1 - test/test_writer_fsm.cpp | 39 ++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 63f5bffb..d8db73e5 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -16,7 +16,6 @@ #include #include -#include // Sans-io algorithm for the writer task, as a finite state machine diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 880aa32e..60973058 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -24,6 +24,7 @@ #include #include #include +#include using namespace boost::redis; namespace asio = boost::asio; @@ -118,7 +119,7 @@ struct fixture : detail::log_fixture { fixture() { - st.cfg.health_check_id = "my_health_check"; + st.ping_req.push("PING", "ping_msg"); // would be set up by the runner st.cfg.health_check_interval = 4s; } }; @@ -278,6 +279,41 @@ void test_short_writes() }); } +// If no data arrives during the health check interval, a ping is written +void test_ping() +{ + // Setup + fixture fix; + error_code ec; + constexpr std::string_view ping_payload = "*2\r\n$4\r\nPING\r\n$8\r\nping_msg\r\n"; + + // Start. There is no request, so we wait + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // No request arrives during the wait interval so a ping is added + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(fix.st.mpx.get_write_buffer(), ping_payload); + + // Write successful + act = fix.fsm.resume(fix.st, error_code(), ping_payload.size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // Simulate a successful response to the PING + constexpr std::string_view ping_response = "$8\r\nping_msg\r\n"; + read(fix.st.mpx, ping_response); + auto res = fix.st.mpx.consume(ec); + BOOST_TEST_EQ(ec, error_code()); + BOOST_TEST(res.first == consume_result::got_response); + BOOST_TEST_EQ(res.second, ping_response.size()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 28 bytes written."}, + }); +} + // A write error makes the writer exit void test_write_error() { @@ -426,6 +462,7 @@ int main() test_request_arrives_while_writing(); test_no_request_at_startup(); test_short_writes(); + test_ping(); test_write_error(); test_write_timeout(); From 87c42625d153b894c94eb6ca08a9c79344322d49 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 17:13:41 +0200 Subject: [PATCH 26/41] ping error --- test/test_writer_fsm.cpp | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 60973058..03ac46b5 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -314,6 +314,41 @@ void test_ping() }); } +// If the server answers with an error in PING, we log it and produce an error +void test_ping_error() +{ + // Setup + fixture fix; + error_code ec; + + // Start. There is no request, so we wait + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // No request arrives during the wait interval so a ping is added + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + + // Write successful + const auto ping_size = fix.st.mpx.get_write_buffer().size(); + act = fix.fsm.resume(fix.st, error_code(), ping_size, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // Simulate an error response to the PING + constexpr std::string_view ping_response = "-ERR: bad command\r\n"; + read(fix.st.mpx, ping_response); + auto res = fix.st.mpx.consume(ec); + BOOST_TEST_EQ(ec, error::resp3_simple_error); + BOOST_TEST(res.first == consume_result::got_response); + BOOST_TEST_EQ(res.second, ping_response.size()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 28 bytes written." }, + {logger::level::info, "Health checker: server answered ping with an error: ERR: bad command"}, + }); +} + // A write error makes the writer exit void test_write_error() { @@ -462,7 +497,9 @@ int main() test_request_arrives_while_writing(); test_no_request_at_startup(); test_short_writes(); + test_ping(); + test_ping_error(); test_write_error(); test_write_timeout(); From 843ae9ef2be5490d3006a873f05664aeb3ecde09 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 17:47:13 +0200 Subject: [PATCH 27/41] Docs --- include/boost/redis/config.hpp | 19 ++++++++++++++++++- include/boost/redis/connection.hpp | 14 +++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/include/boost/redis/config.hpp b/include/boost/redis/config.hpp index 544b7d21..5a812141 100644 --- a/include/boost/redis/config.hpp +++ b/include/boost/redis/config.hpp @@ -83,7 +83,7 @@ struct config { */ std::optional database_index = 0; - /// Message used by the health-checker in @ref boost::redis::basic_connection::async_run. + /// Message used by `PING` commands sent by the health checker. std::string health_check_id = "Boost.Redis"; /** @@ -106,6 +106,23 @@ struct config { /** @brief Time span between successive health checks. * Set to zero to disable health-checks. + * + * When this value is set to a non-zero duration, @ref basic_connection::async_run + * will issue `PING` commands whenever no command is sent to the server for more + * than `health_check_interval`. You can configure the message passed to the `PING` + * command using @ref health_check_id. + * + * Enabling health checks also sets timeouts to individual network + * operations. The connection is considered dead if: + * + * @li No byte can be written to the server after `health_check_interval`. + * @li No byte is read from the server after `2 * health_check_interval`. + * + * If the health checker finds that the connection is unresponsive, it will be closed, + * and a reconnection will be triggered, as if a network error had occurred. + * + * The exact timeout values are *not* part of the interface, and might change + * in future versions. */ std::chrono::steady_clock::duration health_check_interval = std::chrono::seconds{2}; diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 75f3c56b..826b4c86 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -547,19 +547,15 @@ class basic_connection { * @ref boost::redis::config::addr. * @li Establishes a physical connection to the server. For TCP connections, * connects to one of the endpoints obtained during name resolution. - * For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_socket. + * For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_sockets. * @li If @ref boost::redis::config::use_ssl is `true`, performs the TLS handshake. * @li Executes the setup request, as defined by the passed @ref config object. * By default, this is a `HELLO` command, but it can contain any other arbitrary - * commands. See the @ref config docs for more info. - * @li Starts a health-check operation where ping commands are sent + * commands. See the @ref config::setup docs for more info. + * @li Starts a health-check operation where `PING` commands are sent * at intervals specified by - * @ref boost::redis::config::health_check_interval. - * The message passed to `PING` will be @ref boost::redis::config::health_check_id. - * Passing an interval with a zero value will disable health-checks. If the Redis - * server does not respond to a health-check within two times the value - * specified here, it will be considered unresponsive and the connection - * will be closed. + * @ref config::health_check_interval when the connection is idle. + * See the documentation of @ref config::health_check_interval for more info. * @li Starts read and write operations. Requests issued using @ref async_exec * before `async_run` is called will be written to the server immediately. * From b9a44b8143a25fb06ee2bad8491e20b57cad1b8c Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 17:50:19 +0200 Subject: [PATCH 28/41] Remove some health check disables in tests --- test/test_conn_echo_stress.cpp | 1 - test/test_conn_exec.cpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_conn_echo_stress.cpp b/test/test_conn_echo_stress.cpp index d3db9fe6..5b37489d 100644 --- a/test/test_conn_echo_stress.cpp +++ b/test/test_conn_echo_stress.cpp @@ -95,7 +95,6 @@ BOOST_AUTO_TEST_CASE(echo_stress) net::io_context ctx; connection conn{ctx}; auto cfg = make_test_config(); - cfg.health_check_interval = std::chrono::seconds::zero(); // Number of coroutines that will send pings sharing the same // connection to redis. diff --git a/test/test_conn_exec.cpp b/test/test_conn_exec.cpp index 4ac3bba9..28a23497 100644 --- a/test/test_conn_exec.cpp +++ b/test/test_conn_exec.cpp @@ -134,8 +134,7 @@ BOOST_AUTO_TEST_CASE(large_number_of_concurrent_requests_issue_170) auto conn = std::make_shared(ioc); auto cfg = make_test_config(); - cfg.health_check_interval = std::chrono::seconds(0); - conn->async_run(cfg, {}, net::detached); + conn->async_run(cfg, net::detached); constexpr int repeat = 8000; int remaining = repeat; From 5ddcdd50424957948db370d00cadbfcef3ec27bb Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Thu, 16 Oct 2025 17:57:34 +0200 Subject: [PATCH 29/41] health checks disabled --- test/test_writer_fsm.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 03ac46b5..153ebd62 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -314,6 +314,33 @@ void test_ping() }); } +// Disabled health checks don't cause trouble +void test_health_checks_disabled() +{ + // Setup + fixture fix; + test_elem item; + fix.st.cfg.health_check_interval = 0s; + + // A request arrives before the writer starts + fix.st.mpx.add(item.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0u, 0s)); + BOOST_TEST(item.elm->is_staged()); + + // The write completes successfully. The request is written, and we go back to sleep. + act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(0s)); + BOOST_TEST(item.elm->is_written()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 24 bytes written."}, + }); +} + // If the server answers with an error in PING, we log it and produce an error void test_ping_error() { @@ -497,6 +524,7 @@ int main() test_request_arrives_while_writing(); test_no_request_at_startup(); test_short_writes(); + test_health_checks_disabled(); test_ping(); test_ping_error(); From aaccbb492d6d31aa53cf1c03b60cd265d81684fb Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 12:26:29 +0200 Subject: [PATCH 30/41] Remove unnecessary ping_resp --- include/boost/redis/detail/connection_state.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/boost/redis/detail/connection_state.hpp b/include/boost/redis/detail/connection_state.hpp index 688c56d5..8e13e43c 100644 --- a/include/boost/redis/detail/connection_state.hpp +++ b/include/boost/redis/detail/connection_state.hpp @@ -25,7 +25,6 @@ struct connection_state { multiplexer mpx{}; generic_response setup_resp{}; request ping_req{}; - generic_response ping_resp{}; }; } // namespace boost::redis::detail From e44873152cd1f0a3c66ede505be5e30c981cd89e Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 12:31:35 +0200 Subject: [PATCH 31/41] Use cancel_at --- include/boost/redis/connection.hpp | 48 ++++++++++++++---------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 826b4c86..53dc5747 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -66,6 +66,14 @@ namespace boost::redis { namespace detail { +// Given a timeout value, compute the expiry time. A zero timeout is considered to mean "no timeout" +inline std::chrono::steady_clock::time_point compute_expiry( + std::chrono::steady_clock::duration timeout) +{ + return timeout.count() == 0 ? (std::chrono::steady_clock::time_point::max)() + : std::chrono::steady_clock::now() + timeout; +} + template struct connection_impl { using clock_type = std::chrono::steady_clock; @@ -231,23 +239,15 @@ struct writer_op { switch (act.type()) { case writer_action_type::done: self.complete(act.error()); return; case writer_action_type::write_some: - { - auto buf = asio::buffer(conn->st_.mpx.get_write_buffer().substr(act.write_offset())); - if (auto timeout = act.timeout(); timeout.count() != 0) { - conn->stream_.async_write_some( - buf, - asio::cancel_after(conn->writer_timer_, timeout, std::move(self))); - } else { - conn->stream_.async_write_some(buf, std::move(self)); - } + conn->stream_.async_write_some( + asio::buffer(conn->st_.mpx.get_write_buffer().substr(act.write_offset())), + asio::cancel_at( + conn->writer_timer_, + compute_expiry(act.timeout()), + std::move(self))); return; - } case writer_action_type::wait: - if (auto timeout = act.timeout(); timeout.count() != 0) { - conn->writer_cv_.expires_after(timeout); - } else { - conn->writer_cv_.expires_at((std::chrono::steady_clock::time_point::max)()); - } + conn->writer_cv_.expires_at(compute_expiry(act.timeout())); conn->writer_cv_.async_wait(std::move(self)); return; } @@ -273,17 +273,13 @@ struct reader_op { switch (act.get_type()) { case reader_fsm::action::type::read_some: - { - auto const buf = asio::buffer(conn->st_.mpx.get_prepared_read_buffer()); - if (act.timeout().count() != 0) { - conn->stream_.async_read_some( - buf, - asio::cancel_after(conn->reader_timer_, act.timeout(), std::move(self))); - } else { - conn->stream_.async_read_some(buf, std::move(self)); - } + conn->stream_.async_read_some( + asio::buffer(conn->st_.mpx.get_prepared_read_buffer()), + asio::cancel_at( + conn->reader_timer_, + compute_expiry(act.timeout()), + std::move(self))); return; - } case reader_fsm::action::type::notify_push_receiver: if (conn->receive_channel_.try_send(ec, act.push_size())) { continue; From b2c150daf0fc46d2583aeb2c6d602bcd8f67624c Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 12:34:43 +0200 Subject: [PATCH 32/41] include cleanup --- include/boost/redis/connection.hpp | 7 ------- include/boost/redis/detail/helper.hpp | 22 ---------------------- 2 files changed, 29 deletions(-) delete mode 100644 include/boost/redis/detail/helper.hpp diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index 53dc5747..a914c465 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -13,11 +13,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -39,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -47,14 +44,10 @@ #include #include #include -#include -#include #include #include -#include #include #include -#include #include #include diff --git a/include/boost/redis/detail/helper.hpp b/include/boost/redis/detail/helper.hpp deleted file mode 100644 index 58a7fe97..00000000 --- a/include/boost/redis/detail/helper.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2018-2024 Marcelo Zimbres Silva (mzimbres@gmail.com) - * - * Distributed under the Boost Software License, Version 1.0. (See - * accompanying file LICENSE.txt) - */ - -#ifndef BOOST_REDIS_HELPER_HPP -#define BOOST_REDIS_HELPER_HPP - -#include - -namespace boost::redis::detail { - -template -auto is_cancelled(T const& self) -{ - return self.get_cancellation_state().cancelled() != asio::cancellation_type_t::none; -} - -} // namespace boost::redis::detail - -#endif // BOOST_REDIS_HELPER_HPP From 2af5702e93ee1f82de30ba6eb848c63bd2283b36 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 12:54:16 +0200 Subject: [PATCH 33/41] Move the write offset to the multiplexer --- include/boost/redis/connection.hpp | 2 +- include/boost/redis/detail/multiplexer.hpp | 17 +++++----- include/boost/redis/detail/writer_fsm.hpp | 29 +++++------------ include/boost/redis/impl/multiplexer.ipp | 37 ++++++++++++---------- include/boost/redis/impl/writer_fsm.ipp | 32 ++++++++----------- 5 files changed, 53 insertions(+), 64 deletions(-) diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index a914c465..993bc526 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -233,7 +233,7 @@ struct writer_op { case writer_action_type::done: self.complete(act.error()); return; case writer_action_type::write_some: conn->stream_.async_write_some( - asio::buffer(conn->st_.mpx.get_write_buffer().substr(act.write_offset())), + asio::buffer(conn->st_.mpx.get_write_buffer()), asio::cancel_at( conn->writer_timer_, compute_expiry(act.timeout()), diff --git a/include/boost/redis/detail/multiplexer.hpp b/include/boost/redis/detail/multiplexer.hpp index d038adf0..c4eefd0c 100644 --- a/include/boost/redis/detail/multiplexer.hpp +++ b/include/boost/redis/detail/multiplexer.hpp @@ -18,6 +18,7 @@ #include +#include #include #include #include @@ -136,12 +137,12 @@ class multiplexer { [[nodiscard]] auto prepare_write() -> std::size_t; - // To be called after a successful write operation. - // Returns the number of requests that have been released because - // they don't have a response e.g. SUBSCRIBE. + // To be called after a write operation. + // Returns true once all the bytes in the buffer generated by prepare_write + // have been written. // Must be called before cancel_on_conn_lost() because it might change // request status. - auto commit_write() -> std::size_t; + auto commit_write(std::size_t bytes_written) -> bool; // To be called after a successful read operation. // Must be called before cancel_on_conn_lost() because it might change @@ -177,7 +178,7 @@ class multiplexer { [[nodiscard]] auto get_write_buffer() const noexcept -> std::string_view { - return std::string_view{write_buffer_}; + return std::string_view{write_buffer_}.substr(write_offset_); } [[nodiscard]] @@ -210,15 +211,15 @@ class multiplexer { [[nodiscard]] auto is_next_push(std::string_view data) const noexcept -> bool; - // Releases the number of requests that have been released. - [[nodiscard]] - auto release_push_requests() -> std::size_t; + // Completes requests that don't expect a response + void release_push_requests(); [[nodiscard]] consume_result consume_impl(system::error_code& ec); read_buffer read_buffer_; std::string write_buffer_; + std::size_t write_offset_{}; // how many bytes of the write buffer have been written? std::deque> reqs_; resp3::parser parser_{}; bool on_push_ = false; diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index d8db73e5..187e6743 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -37,18 +37,12 @@ class writer_action { writer_action_type type_; union { system::error_code ec_; - struct { - std::size_t offset; - std::chrono::steady_clock::duration timeout; - } offset_timeout_; + std::chrono::steady_clock::duration timeout_; }; - writer_action( - writer_action_type type, - std::size_t offset, - std::chrono::steady_clock::duration t) noexcept + writer_action(writer_action_type type, std::chrono::steady_clock::duration t) noexcept : type_{type} - , offset_timeout_{offset, t} + , timeout_{t} { } public: @@ -59,14 +53,14 @@ class writer_action { , ec_{ec} { } - static writer_action write_some(std::size_t offset, std::chrono::steady_clock::duration timeout) + static writer_action write_some(std::chrono::steady_clock::duration timeout) { - return {writer_action_type::write_some, offset, timeout}; + return {writer_action_type::write_some, timeout}; } static writer_action wait(std::chrono::steady_clock::duration timeout) { - return {writer_action_type::wait, 0u, timeout}; + return {writer_action_type::wait, timeout}; } system::error_code error() const @@ -75,25 +69,18 @@ class writer_action { return ec_; } - std::size_t write_offset() const - { - BOOST_ASSERT(type_ == writer_action_type::write_some); - return offset_timeout_.offset; - } - std::chrono::steady_clock::duration timeout() const { BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait); - return offset_timeout_.timeout; + return timeout_; } }; class writer_fsm { int resume_point_{0}; - std::size_t write_offset_{}; public: - writer_fsm() noexcept = default; + writer_fsm() = default; writer_action resume( connection_state& st, diff --git a/include/boost/redis/impl/multiplexer.ipp b/include/boost/redis/impl/multiplexer.ipp index 176a3cee..9bfdd149 100644 --- a/include/boost/redis/impl/multiplexer.ipp +++ b/include/boost/redis/impl/multiplexer.ipp @@ -11,6 +11,7 @@ #include #include +#include #include namespace boost::redis::detail { @@ -65,11 +66,19 @@ void multiplexer::cancel(std::shared_ptr const& ptr) } } -std::size_t multiplexer::commit_write() +bool multiplexer::commit_write(std::size_t bytes_written) { BOOST_ASSERT(!cancel_run_called_); - usage_.bytes_sent += std::size(write_buffer_); + BOOST_ASSERT(bytes_written + write_offset_ <= write_buffer_.size()); + usage_.bytes_sent += bytes_written; + write_offset_ += bytes_written; + + // Are there still more bytes to write? + if (write_offset_ < write_buffer_.size()) + return false; + + // We've written all the bytes in the write buffer. // We have to clear the payload right after writing it to use it // as a flag that informs there is no ongoing write. write_buffer_.clear(); @@ -83,7 +92,9 @@ std::size_t multiplexer::commit_write() } }); - return release_push_requests(); + release_push_requests(); + + return true; } void multiplexer::add(std::shared_ptr const& info) @@ -151,8 +162,7 @@ consume_result multiplexer::consume_impl(system::error_code& ec) return consume_result::got_response; } -std::pair -multiplexer::consume(system::error_code& ec) +std::pair multiplexer::consume(system::error_code& ec) { BOOST_ASSERT(!cancel_run_called_); @@ -172,20 +182,14 @@ multiplexer::consume(system::error_code& ec) return std::make_pair(consume_result::needs_more, consumed); } -auto multiplexer::prepare_read() noexcept -> system::error_code -{ - return read_buffer_.prepare(); -} +auto multiplexer::prepare_read() noexcept -> system::error_code { return read_buffer_.prepare(); } auto multiplexer::get_prepared_read_buffer() noexcept -> read_buffer::span_type { return read_buffer_.get_prepared(); } -void multiplexer::commit_read(std::size_t bytes_read) -{ - read_buffer_.commit(bytes_read); -} +void multiplexer::commit_read(std::size_t bytes_read) { read_buffer_.commit(bytes_read); } auto multiplexer::get_read_buffer_size() const noexcept -> std::size_t { @@ -196,6 +200,7 @@ void multiplexer::reset() { read_buffer_.clear(); write_buffer_.clear(); + write_offset_ = 0u; parser_.reset(); on_push_ = false; cancel_run_called_ = false; @@ -222,6 +227,8 @@ std::size_t multiplexer::prepare_write() usage_.commands_sent += ri->get_request().get_commands(); }); + write_offset_ = 0u; + auto const d = std::distance(point, std::cend(reqs_)); return static_cast(d); } @@ -331,7 +338,7 @@ bool multiplexer::is_next_push(std::string_view data) const noexcept return reqs_.front()->is_waiting(); } -std::size_t multiplexer::release_push_requests() +void multiplexer::release_push_requests() { auto point = std::stable_partition( std::begin(reqs_), @@ -344,9 +351,7 @@ std::size_t multiplexer::release_push_requests() ptr->notify_done(); }); - auto const d = std::distance(point, std::end(reqs_)); reqs_.erase(point, std::end(reqs_)); - return static_cast(d); } bool multiplexer::is_writing() const noexcept { return !write_buffer_.empty(); } diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index 10781582..ed1518c7 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -70,17 +70,16 @@ writer_action writer_fsm::resume( while (st.mpx.prepare_write() != 0u) { // Write an entire message. We can't use asio::async_write because we want // to apply timeouts to individual write operations - write_offset_ = 0u; - while (!write_done(st, write_offset_)) { + for (;;) { // Write what we can. If nothing has been written for the health check // interval, we consider the connection as failed BOOST_REDIS_YIELD( resume_point_, 1, - writer_action::write_some(write_offset_, st.cfg.health_check_interval)) + writer_action::write_some(st.cfg.health_check_interval)) // Commit the received bytes. This accounts for partial success - write_offset_ += bytes_written; + bool finished = st.mpx.commit_write(bytes_written); st.logger.on_write(bytes_written); // Check for cancellations and translate error codes @@ -90,21 +89,18 @@ writer_action writer_fsm::resume( ec = error::write_timeout; // Check for errors - if (ec) - break; - } - - // Write complete. Process its results. Note that we may have partial success - if (write_done(st, write_offset_)) - st.mpx.commit_write(); - - if (ec) { - if (ec == asio::error::operation_aborted) { - st.logger.trace("Writer task: cancelled (1)."); - } else { - st.logger.trace("Writer task error", ec); + if (ec) { + if (ec == asio::error::operation_aborted) { + st.logger.trace("Writer task: cancelled (1)."); + } else { + st.logger.trace("Writer task error", ec); + } + return ec; } - return ec; + + // Are we done yet? + if (finished) + break; } } From a8a7a6ac00bae690e13d5e0446e9bfc93c509152 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:04:34 +0200 Subject: [PATCH 34/41] Fix multiplexer tests --- test/test_multiplexer.cpp | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/test/test_multiplexer.cpp b/test/test_multiplexer.cpp index 96705a18..9011688c 100644 --- a/test/test_multiplexer.cpp +++ b/test/test_multiplexer.cpp @@ -15,14 +15,13 @@ #include #include +#include "sansio_utils.hpp" + #include #include #include -#include #include -#include "sansio_utils.hpp" - using boost::redis::request; using boost::redis::detail::multiplexer; using boost::redis::detail::consume_result; @@ -118,7 +117,7 @@ void test_request_needs_more() // Add the element to the multiplexer and simulate a successful write mpx.add(item1.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(item1.req.payload().size())); BOOST_TEST(item1.elem_ptr->is_written()); BOOST_TEST(!item1.done); @@ -161,6 +160,13 @@ void test_several_requests() BOOST_TEST_EQ(mpx.prepare_write(), 3u); BOOST_TEST_EQ(mpx.prepare_write(), 0u); + // The write buffer holds the 3 requests, coalesced + constexpr std::string_view expected_buffer = + "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" + "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" + "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n"; + BOOST_TEST_EQ(mpx.get_write_buffer(), expected_buffer); + // After coalescing the requests for writing their statuses should // be changed to "staged". BOOST_TEST(item1.elem_ptr->is_staged()); @@ -180,7 +186,7 @@ void test_several_requests() // The commit_write call informs the multiplexer the payload was // sent (e.g. written to the socket). This step releases requests // that has no response. - BOOST_TEST_EQ(mpx.commit_write(), 1u); + BOOST_TEST(mpx.commit_write(expected_buffer.size())); // The staged status should now have changed to written. BOOST_TEST(item1.elem_ptr->is_written()); @@ -251,7 +257,7 @@ void test_request_response_before_write() item.reset(); // The write gets confirmed and causes no problem - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); } void test_push() @@ -395,7 +401,7 @@ void test_mix_responses_pushes() mpx.add(item1.elem_ptr); mpx.add(item2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); BOOST_TEST(item1.elem_ptr->is_written()); BOOST_TEST(!item1.done); BOOST_TEST(item2.elem_ptr->is_written()); @@ -488,7 +494,7 @@ void test_cancel_waiting() // We can progress the other request normally BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); error_code ec; read(mpx, "$11\r\nHello world\r\n"); auto res = mpx.consume(ec); @@ -518,7 +524,7 @@ void test_cancel_staged() item1.reset(); // Verify we don't reference this item anyhow // The write gets confirmed - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // The cancelled request's response arrives. It gets discarded error_code ec; @@ -556,7 +562,7 @@ void test_cancel_staged_command_without_response() item1.reset(); // Verify we don't reference this item anyhow // The write gets confirmed - BOOST_TEST_EQ(mpx.commit_write(), 1u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // The 2nd request's response arrives. It gets parsed successfully error_code ec; @@ -582,7 +588,7 @@ void test_cancel_written() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Cancel the first request mpx.cancel(item1->elem_ptr); @@ -623,7 +629,7 @@ void test_cancel_written_half_parsed_response() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Get the response for the 1st command in req1 error_code ec; @@ -686,7 +692,7 @@ void test_cancel_written_null_error() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Cancel the first request mpx.cancel(item1->elem_ptr); @@ -741,7 +747,7 @@ void test_cancel_on_connection_lost() mpx.add(item_written1.elem_ptr); mpx.add(item_written2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); mpx.add(item_staged1.elem_ptr); mpx.add(item_staged2.elem_ptr); @@ -794,7 +800,7 @@ void test_cancel_on_connection_lost_abandoned() mpx.add(item_written1->elem_ptr); mpx.add(item_written2->elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); mpx.add(item_staged1->elem_ptr); mpx.add(item_staged2->elem_ptr); @@ -898,7 +904,7 @@ void test_reset() // We're able to add write requests and read responses - all state was reset mpx.add(item2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); std::string_view response_buffer = "$11\r\nHello world\r\n"; read(mpx, response_buffer); From 65e729d2732cec4ba9d7e05b671506708880c02b Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:11:23 +0200 Subject: [PATCH 35/41] short writes test --- test/test_multiplexer.cpp | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test/test_multiplexer.cpp b/test/test_multiplexer.cpp index 9011688c..7ee3095e 100644 --- a/test/test_multiplexer.cpp +++ b/test/test_multiplexer.cpp @@ -228,6 +228,61 @@ void test_several_requests() BOOST_TEST(item3.done); } +void test_short_writes() +{ + // Setup + multiplexer mpx; + test_item item1{}; + test_item item2{false}; + + // Add some requests to the multiplexer. + mpx.add(item1.elem_ptr); + mpx.add(item2.elem_ptr); + BOOST_TEST(item1.elem_ptr->is_waiting()); + BOOST_TEST(item2.elem_ptr->is_waiting()); + + // Start writing them + BOOST_TEST_EQ(mpx.prepare_write(), 2u); + BOOST_TEST_EQ( + mpx.get_write_buffer(), + "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write a small part. The write buffer gets updated, but request status is not changed + BOOST_TEST_NOT(mpx.commit_write(8u)); + BOOST_TEST_EQ( + mpx.get_write_buffer(), + "PING\r\n$8\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write another part + BOOST_TEST_NOT(mpx.commit_write(20u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // A zero-size write doesn't cause trouble + BOOST_TEST_NOT(mpx.commit_write(0u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write everything except the last byte + BOOST_TEST_NOT(mpx.commit_write(32u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write the last byte + BOOST_TEST(mpx.commit_write(1u)); + BOOST_TEST(item1.elem_ptr->is_written()); + BOOST_TEST(item2.elem_ptr->is_done()); +} + // The response to a request is received before its write // confirmation. This might happen on heavy load void test_request_response_before_write() @@ -925,6 +980,7 @@ int main() test_request_needs_more(); test_several_requests(); test_request_response_before_write(); + test_short_writes(); test_push(); test_push_needs_more(); test_push_heuristics_no_request(); From a052493a65f00a7feb89d537d716a9e6cf054d6c Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:13:25 +0200 Subject: [PATCH 36/41] Remove unused multiplexer::is_writing --- include/boost/redis/detail/multiplexer.hpp | 3 --- include/boost/redis/impl/multiplexer.ipp | 4 ---- 2 files changed, 7 deletions(-) diff --git a/include/boost/redis/detail/multiplexer.hpp b/include/boost/redis/detail/multiplexer.hpp index c4eefd0c..14afc6b5 100644 --- a/include/boost/redis/detail/multiplexer.hpp +++ b/include/boost/redis/detail/multiplexer.hpp @@ -200,9 +200,6 @@ class multiplexer { return usage_; } - [[nodiscard]] - auto is_writing() const noexcept -> bool; - void set_config(config const& cfg); private: diff --git a/include/boost/redis/impl/multiplexer.ipp b/include/boost/redis/impl/multiplexer.ipp index 9bfdd149..b6d118ce 100644 --- a/include/boost/redis/impl/multiplexer.ipp +++ b/include/boost/redis/impl/multiplexer.ipp @@ -79,8 +79,6 @@ bool multiplexer::commit_write(std::size_t bytes_written) return false; // We've written all the bytes in the write buffer. - // We have to clear the payload right after writing it to use it - // as a flag that informs there is no ongoing write. write_buffer_.clear(); // There is small optimization possible here: traverse only the @@ -354,8 +352,6 @@ void multiplexer::release_push_requests() reqs_.erase(point, std::end(reqs_)); } -bool multiplexer::is_writing() const noexcept { return !write_buffer_.empty(); } - void multiplexer::set_receive_adapter(any_adapter adapter) { receive_adapter_ = std::move(adapter); From e5ef3ffdbd55da00d3949e4492bc02cad75f4565 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:15:35 +0200 Subject: [PATCH 37/41] Fix exec_fsm tests --- test/test_exec_fsm.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_exec_fsm.cpp b/test/test_exec_fsm.cpp index ada451b6..f2385355 100644 --- a/test/test_exec_fsm.cpp +++ b/test/test_exec_fsm.cpp @@ -138,7 +138,7 @@ void test_success() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a successful read read(mpx, "$5\r\nhello\r\n"); @@ -177,7 +177,7 @@ void test_parse_error() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a read that will trigger an error. // The second field should be a number (rather than the empty string). @@ -239,7 +239,7 @@ void test_not_connected() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a successful read read(mpx, "$5\r\nhello\r\n"); @@ -329,7 +329,7 @@ void test_cancel_notwaiting_terminal_partial() // The multiplexer starts writing the request BOOST_TEST_EQ_MSG(mpx.prepare_write(), 1u, tc.name); - BOOST_TEST_EQ_MSG(mpx.commit_write(), 0u, tc.name); + BOOST_TEST_EQ_MSG(mpx.commit_write(mpx.get_write_buffer().size()), true, tc.name); // A cancellation arrives act = fsm.resume(true, tc.type); @@ -368,7 +368,7 @@ void test_cancel_notwaiting_total() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // We got requested a cancellation here, but we can't honor it act = fsm.resume(true, asio::cancellation_type_t::total); From b9ee18202aa40d8411c80e1ae427687953f8cb4d Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:18:43 +0200 Subject: [PATCH 38/41] Fix writer fsm tests --- test/test_writer_fsm.cpp | 42 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 153ebd62..f206b2e4 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -63,11 +63,10 @@ bool operator==(const writer_action& lhs, const writer_action& rhs) noexcept if (lhs.type() != rhs.type()) return false; switch (lhs.type()) { - case writer_action_type::done: return lhs.error() == rhs.error(); + case writer_action_type::done: return lhs.error() == rhs.error(); case writer_action_type::write_some: - return lhs.write_offset() == rhs.write_offset() && lhs.timeout() == rhs.timeout(); - case writer_action_type::wait: return lhs.timeout() == rhs.timeout(); - default: BOOST_ASSERT(false); + case writer_action_type::wait: return lhs.timeout() == rhs.timeout(); + default: BOOST_ASSERT(false); } return false; } @@ -79,9 +78,6 @@ std::ostream& operator<<(std::ostream& os, const writer_action& act) switch (t) { case writer_action_type::done: os << ", .error=" << act.error(); break; case writer_action_type::write_some: - os << ", .write_buffer=" << act.write_offset() - << ", timeout=" << to_milliseconds(act.timeout()) << "ms"; - break; case writer_action_type::wait: os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms"; break; @@ -136,7 +132,7 @@ void test_single_request() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. @@ -150,7 +146,7 @@ void test_single_request() // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item2.elm->is_staged()); // Write successful @@ -178,7 +174,7 @@ void test_request_arrives_while_writing() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // While the write is outstanding, a new request arrives @@ -188,7 +184,7 @@ void test_request_arrives_while_writing() // and we start writing the new one act = fix.fsm .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_written()); BOOST_TEST(item2.elm->is_staged()); @@ -221,7 +217,7 @@ void test_no_request_at_startup() // The wait is cancelled to signal we've got a new request act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write successful @@ -247,22 +243,22 @@ void test_short_writes() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // We write a few bytes. It's not the entire message, so we write again act = fix.fsm.resume(fix.st, error_code(), 2u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(2u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // We write some more bytes, but still not the entire message. act = fix.fsm.resume(fix.st, error_code(), 5u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(7u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // A zero size write doesn't cause trouble act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(7u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // Complete writing the message (the entire payload is 24 bytes long) @@ -293,7 +289,7 @@ void test_ping() // No request arrives during the wait interval so a ping is added act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST_EQ(fix.st.mpx.get_write_buffer(), ping_payload); // Write successful @@ -327,7 +323,7 @@ void test_health_checks_disabled() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 0s)); + BOOST_TEST_EQ(act, writer_action::write_some(0s)); BOOST_TEST(item.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. @@ -354,7 +350,7 @@ void test_ping_error() // No request arrives during the wait interval so a ping is added act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); // Write successful const auto ping_size = fix.st.mpx.get_write_buffer().size(); @@ -388,7 +384,7 @@ void test_write_error() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // The write completes with an error (possibly with partial success). @@ -416,7 +412,7 @@ void test_write_timeout() // Start. A write is triggered, and the request is marked as staged auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // The write times out, so it completes with operation_aborted @@ -444,7 +440,7 @@ void test_cancel_write() // Start. A write is triggered auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled and failed with operation_aborted @@ -471,7 +467,7 @@ void test_cancel_write_edge() // Start. A write is triggered auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action::write_some(0u, 4s)); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled but without error From 3c244174361a0f6114e3b7f414b24bd886aa72c6 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:24:12 +0200 Subject: [PATCH 39/41] Fix failing multiplexer tests --- test/test_multiplexer.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/test_multiplexer.cpp b/test/test_multiplexer.cpp index 7ee3095e..22b56246 100644 --- a/test/test_multiplexer.cpp +++ b/test/test_multiplexer.cpp @@ -162,9 +162,9 @@ void test_several_requests() // The write buffer holds the 3 requests, coalesced constexpr std::string_view expected_buffer = - "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" - "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" - "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n"; + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"; BOOST_TEST_EQ(mpx.get_write_buffer(), expected_buffer); // After coalescing the requests for writing their statuses should @@ -245,8 +245,8 @@ void test_short_writes() BOOST_TEST_EQ(mpx.prepare_write(), 2u); BOOST_TEST_EQ( mpx.get_write_buffer(), - "*2\r\n$4\r\nPING\r\n$8\r\ncmd-arg\r\n" - "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); BOOST_TEST(item1.elem_ptr->is_staged()); BOOST_TEST(item2.elem_ptr->is_staged()); @@ -254,25 +254,25 @@ void test_short_writes() BOOST_TEST_NOT(mpx.commit_write(8u)); BOOST_TEST_EQ( mpx.get_write_buffer(), - "PING\r\n$8\r\ncmd-arg\r\n" - "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + "PING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); BOOST_TEST(item1.elem_ptr->is_staged()); BOOST_TEST(item2.elem_ptr->is_staged()); // Write another part - BOOST_TEST_NOT(mpx.commit_write(20u)); - BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST_NOT(mpx.commit_write(19u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); BOOST_TEST(item1.elem_ptr->is_staged()); BOOST_TEST(item2.elem_ptr->is_staged()); // A zero-size write doesn't cause trouble BOOST_TEST_NOT(mpx.commit_write(0u)); - BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$8\r\ncmd-arg\r\n"); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); BOOST_TEST(item1.elem_ptr->is_staged()); BOOST_TEST(item2.elem_ptr->is_staged()); // Write everything except the last byte - BOOST_TEST_NOT(mpx.commit_write(32u)); + BOOST_TEST_NOT(mpx.commit_write(31u)); BOOST_TEST_EQ(mpx.get_write_buffer(), "\n"); BOOST_TEST(item1.elem_ptr->is_staged()); BOOST_TEST(item2.elem_ptr->is_staged()); From 0028adf471ff2fc5505ba1779ef6f02a4d5a000f Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 13:40:19 +0200 Subject: [PATCH 40/41] Remove unused function --- include/boost/redis/impl/writer_fsm.ipp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index ed1518c7..8e129dfd 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -26,7 +26,7 @@ namespace boost::redis::detail { -void process_ping_node( +inline void process_ping_node( connection_logger& lgr, resp3::basic_node const& nd, system::error_code& ec) @@ -51,11 +51,6 @@ inline any_adapter make_ping_adapter(connection_logger& lgr) }}; } -inline bool write_done(const connection_state& st, std::size_t bytes_written) -{ - return bytes_written >= st.mpx.get_write_buffer().size(); -} - writer_action writer_fsm::resume( connection_state& st, system::error_code ec, From 0a0d5a6886c815a232eba87a274019049ae323f8 Mon Sep 17 00:00:00 2001 From: Ruben Perez Date: Fri, 17 Oct 2025 14:24:30 +0200 Subject: [PATCH 41/41] Missing includes --- example/cpp20_chat_room.cpp | 1 + example/cpp20_echo_server.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/example/cpp20_chat_room.cpp b/example/cpp20_chat_room.cpp index a3a960df..e765865e 100644 --- a/example/cpp20_chat_room.cpp +++ b/example/cpp20_chat_room.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/example/cpp20_echo_server.cpp b/example/cpp20_echo_server.cpp index 217c0a9f..f4e27751 100644 --- a/example/cpp20_echo_server.cpp +++ b/example/cpp20_echo_server.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include