diff --git a/example/cpp20_chat_room.cpp b/example/cpp20_chat_room.cpp index a3a960df..e765865e 100644 --- a/example/cpp20_chat_room.cpp +++ b/example/cpp20_chat_room.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/example/cpp20_echo_server.cpp b/example/cpp20_echo_server.cpp index 217c0a9f..f4e27751 100644 --- a/example/cpp20_echo_server.cpp +++ b/example/cpp20_echo_server.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/include/boost/redis/config.hpp b/include/boost/redis/config.hpp index 0a2d666f..5a812141 100644 --- a/include/boost/redis/config.hpp +++ b/include/boost/redis/config.hpp @@ -83,7 +83,7 @@ struct config { */ std::optional database_index = 0; - /// Message used by the health-checker in @ref boost::redis::basic_connection::async_run. + /// Message used by `PING` commands sent by the health checker. std::string health_check_id = "Boost.Redis"; /** @@ -105,7 +105,24 @@ struct config { std::chrono::steady_clock::duration ssl_handshake_timeout = std::chrono::seconds{10}; /** @brief Time span between successive health checks. - * Set to zero to disable health-checks pass zero as duration. + * Set to zero to disable health-checks. + * + * When this value is set to a non-zero duration, @ref basic_connection::async_run + * will issue `PING` commands whenever no command is sent to the server for more + * than `health_check_interval`. You can configure the message passed to the `PING` + * command using @ref health_check_id. + * + * Enabling health checks also sets timeouts to individual network + * operations. The connection is considered dead if: + * + * @li No byte can be written to the server after `health_check_interval`. + * @li No byte is read from the server after `2 * health_check_interval`. + * + * If the health checker finds that the connection is unresponsive, it will be closed, + * and a reconnection will be triggered, as if a network error had occurred. + * + * The exact timeout values are *not* part of the interface, and might change + * in future versions. */ std::chrono::steady_clock::duration health_check_interval = std::chrono::seconds{2}; diff --git a/include/boost/redis/connection.hpp b/include/boost/redis/connection.hpp index fbda48ea..993bc526 100644 --- a/include/boost/redis/connection.hpp +++ b/include/boost/redis/connection.hpp @@ -13,11 +13,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -36,10 +34,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include @@ -47,14 +44,10 @@ #include #include #include -#include -#include #include #include -#include #include #include -#include #include #include @@ -66,6 +59,14 @@ namespace boost::redis { namespace detail { +// Given a timeout value, compute the expiry time. A zero timeout is considered to mean "no timeout" +inline std::chrono::steady_clock::time_point compute_expiry( + std::chrono::steady_clock::duration timeout) +{ + return timeout.count() == 0 ? (std::chrono::steady_clock::time_point::max)() + : std::chrono::steady_clock::now() + timeout; +} + template struct connection_impl { using clock_type = std::chrono::steady_clock; @@ -80,11 +81,9 @@ struct connection_impl { void(system::error_code, std::size_t)>; redis_stream stream_; - - // Notice we use a timer to simulate a condition-variable. It is - // also more suitable than a channel and the notify operation does - // not suspend. - timer_type writer_timer_; + timer_type writer_timer_; // timer used for write timeouts + timer_type writer_cv_; // condition variable, cancelled when there is new data to write + timer_type reader_timer_; // timer used for read timeouts timer_type reconnect_timer_; // to wait the reconnection period timer_type ping_timer_; // to wait between pings receive_channel_type receive_channel_; @@ -93,7 +92,7 @@ struct connection_impl { using executor_type = Executor; - executor_type get_executor() noexcept { return writer_timer_.get_executor(); } + executor_type get_executor() noexcept { return writer_cv_.get_executor(); } struct exec_op { connection_impl* obj_ = nullptr; @@ -116,7 +115,7 @@ struct connection_impl { asio::async_immediate(self.get_io_executor(), std::move(self)); return; case exec_action_type::notify_writer: - obj_->writer_timer_.cancel(); + obj_->writer_cv_.cancel(); continue; // this action does not require yielding case exec_action_type::wait_for_response: notifier_->async_receive(std::move(self)); @@ -133,13 +132,15 @@ struct connection_impl { connection_impl(Executor&& ex, asio::ssl::context&& ctx, logger&& lgr) : stream_{ex, std::move(ctx)} , writer_timer_{ex} + , writer_cv_{ex} + , reader_timer_{ex} , reconnect_timer_{ex} , ping_timer_{ex} , receive_channel_{ex, 256} , st_{std::move(lgr)} { set_receive_adapter(any_adapter{ignore}); - writer_timer_.expires_at((std::chrono::steady_clock::time_point::max)()); + writer_cv_.expires_at((std::chrono::steady_clock::time_point::max)()); } void cancel(operation op) @@ -200,7 +201,7 @@ struct connection_impl { return asio::async_compose( exec_op{this, notifier, exec_fsm(st_.mpx, std::move(info))}, token, - writer_timer_); + writer_cv_); } void set_receive_adapter(any_adapter adapter) @@ -216,23 +217,32 @@ struct writer_op { explicit writer_op(connection_impl& conn) noexcept : conn_(&conn) - , fsm_(conn.st_.mpx, conn.st_.logger) { } template void operator()(Self& self, system::error_code ec = {}, std::size_t bytes_written = 0u) { - auto act = fsm_.resume(ec, bytes_written, self.get_cancellation_state().cancelled()); - - switch (act.type) { - case writer_action_type::done: self.complete(act.ec); return; - case writer_action_type::write: - asio::async_write( - conn_->stream_, - asio::buffer(conn_->st_.mpx.get_write_buffer()), - std::move(self)); + auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after + auto act = fsm_.resume( + conn->st_, + ec, + bytes_written, + self.get_cancellation_state().cancelled()); + + switch (act.type()) { + case writer_action_type::done: self.complete(act.error()); return; + case writer_action_type::write_some: + conn->stream_.async_write_some( + asio::buffer(conn->st_.mpx.get_write_buffer()), + asio::cancel_at( + conn->writer_timer_, + compute_expiry(act.timeout()), + std::move(self))); + return; + case writer_action_type::wait: + conn->writer_cv_.expires_at(compute_expiry(act.timeout())); + conn->writer_cv_.async_wait(std::move(self)); return; - case writer_action_type::wait: conn_->writer_timer_.async_wait(std::move(self)); return; } } }; @@ -251,130 +261,34 @@ struct reader_op { void operator()(Self& self, system::error_code ec = {}, std::size_t n = 0) { for (;;) { - auto act = fsm_.resume(conn_->st_, n, ec, self.get_cancellation_state().cancelled()); + auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after + auto act = fsm_.resume(conn->st_, n, ec, self.get_cancellation_state().cancelled()); - switch (act.type_) { + switch (act.get_type()) { case reader_fsm::action::type::read_some: - { - auto const buf = conn_->st_.mpx.get_prepared_read_buffer(); - conn_->stream_.async_read_some(asio::buffer(buf), std::move(self)); - } + conn->stream_.async_read_some( + asio::buffer(conn->st_.mpx.get_prepared_read_buffer()), + asio::cancel_at( + conn->reader_timer_, + compute_expiry(act.timeout()), + std::move(self))); return; case reader_fsm::action::type::notify_push_receiver: - if (conn_->receive_channel_.try_send(ec, act.push_size_)) { + if (conn->receive_channel_.try_send(ec, act.push_size())) { continue; } else { - conn_->receive_channel_.async_send(ec, act.push_size_, std::move(self)); - return; + conn->receive_channel_.async_send(ec, act.push_size(), std::move(self)); } return; - case reader_fsm::action::type::done: self.complete(act.ec_); return; - } - } - } -}; - -template -struct health_checker_op { - connection_impl* conn_; - asio::coroutine coro_{}; - - system::error_code check_errors(system::error_code io_ec, asio::cancellation_type_t cancel_state) - { - // Did we have a cancellation? We might not have an error code here - if ((cancel_state & asio::cancellation_type_t::terminal) != asio::cancellation_type_t::none) { - conn_->st_.logger.log(logger::level::info, "Health checker: cancelled"); - return asio::error::operation_aborted; - } - - // operation_aborted and no cancel state means that asio::cancel_after timed out - if (io_ec == asio::error::operation_aborted) { - conn_->st_.logger.log(logger::level::info, "Health checker: ping timed out"); - return error::pong_timeout; - } - - // Did we have other unknown error? - if (io_ec) { - conn_->st_.logger.log(logger::level::info, "Health checker: ping error", io_ec); - return io_ec; - } - - // Did the server answer with an error? - if (conn_->st_.ping_resp.has_error()) { - auto error = conn_->st_.ping_resp.error(); - conn_->st_.logger.log( - logger::level::info, - "Health checker: server answered ping with an error", - error.diagnostic); - return resp3_type_to_error(error.data_type); - } - - // No error - return system::error_code(); - } - -public: - health_checker_op(connection_impl& conn) noexcept - : conn_{&conn} - { } - - template - void operator()(Self& self, system::error_code ec = {}, std::size_t = {}) - { - BOOST_ASIO_CORO_REENTER(coro_) - { - if (conn_->st_.cfg.health_check_interval == std::chrono::seconds::zero()) { - conn_->st_.logger.trace("ping_op (1): timeout disabled."); - - // Wait until we're cancelled. This simplifies parallel group handling a lot - conn_->ping_timer_.expires_at((std::chrono::steady_clock::time_point::max)()); - BOOST_ASIO_CORO_YIELD conn_->ping_timer_.async_wait(std::move(self)); - self.complete(asio::error::operation_aborted); - return; - } - - for (;;) { - // Clean up any previous leftover - clear_response(conn_->st_.ping_resp); - - // Execute the request - BOOST_ASIO_CORO_YIELD - { - auto* conn = conn_; // avoid use-after-move problems - auto timeout = conn->st_.cfg.health_check_interval; - conn->async_exec( - conn->st_.ping_req, - any_adapter{conn->st_.ping_resp}, - asio::cancel_after(conn->ping_timer_, timeout, std::move(self))); - } - - // Check for cancellations and errors in PING - ec = check_errors(ec, self.get_cancellation_state().cancelled()); - if (ec) { - self.complete(ec); - return; - } - - // Wait before pinging again. - conn_->ping_timer_.expires_after(conn_->st_.cfg.health_check_interval); - - BOOST_ASIO_CORO_YIELD - conn_->ping_timer_.async_wait(std::move(self)); - - if (is_cancelled(self)) { - conn_->st_.logger.trace("ping_op (2): cancelled"); - self.complete(asio::error::operation_aborted); - return; - } + case reader_fsm::action::type::done: self.complete(act.error()); return; } } } }; system::error_code translate_parallel_group_errors( - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec); @@ -420,7 +334,7 @@ class run_op { return asio::async_compose( reader_op{*conn_}, std::forward(token), - conn_->writer_timer_); + conn_->writer_cv_); } template @@ -429,16 +343,7 @@ class run_op { return asio::async_compose( writer_op{*conn_}, std::forward(token), - conn_->writer_timer_); - } - - template - auto health_checker(CompletionToken&& token) - { - return asio::async_compose( - health_checker_op{*conn_}, - std::forward(token), - conn_->writer_timer_); + conn_->writer_cv_); } public: @@ -450,15 +355,12 @@ class run_op { template void operator()( Self& self, - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec) { - (*this)( - self, - translate_parallel_group_errors(order, setup_ec, health_check_ec, reader_ec, writer_ec)); + (*this)(self, translate_parallel_group_errors(order, setup_ec, reader_ec, writer_ec)); } template @@ -483,9 +385,6 @@ class run_op { [this](auto token) { return this->send_setup(token); }, - [this](auto token) { - return this->health_checker(token); - }, [this](auto token) { return this->reader(token); }, @@ -626,7 +525,7 @@ class basic_connection { { } /// Returns the associated executor. - executor_type get_executor() noexcept { return impl_->writer_timer_.get_executor(); } + executor_type get_executor() noexcept { return impl_->writer_cv_.get_executor(); } /** @brief Starts the underlying connection operations. * @@ -637,19 +536,15 @@ class basic_connection { * @ref boost::redis::config::addr. * @li Establishes a physical connection to the server. For TCP connections, * connects to one of the endpoints obtained during name resolution. - * For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_socket. + * For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_sockets. * @li If @ref boost::redis::config::use_ssl is `true`, performs the TLS handshake. * @li Executes the setup request, as defined by the passed @ref config object. * By default, this is a `HELLO` command, but it can contain any other arbitrary - * commands. See the @ref config docs for more info. - * @li Starts a health-check operation where ping commands are sent + * commands. See the @ref config::setup docs for more info. + * @li Starts a health-check operation where `PING` commands are sent * at intervals specified by - * @ref boost::redis::config::health_check_interval. - * The message passed to `PING` will be @ref boost::redis::config::health_check_id. - * Passing an interval with a zero value will disable health-checks. If the Redis - * server does not respond to a health-check within two times the value - * specified here, it will be considered unresponsive and the connection - * will be closed. + * @ref config::health_check_interval when the connection is idle. + * See the documentation of @ref config::health_check_interval for more info. * @li Starts read and write operations. Requests issued using @ref async_exec * before `async_run` is called will be written to the server immediately. * @@ -1046,7 +941,7 @@ class basic_connection { asio::async_compose( detail::run_op{self}, token_with_slot, - self->writer_timer_); + self->writer_cv_); } }; diff --git a/include/boost/redis/detail/connection_logger.hpp b/include/boost/redis/detail/connection_logger.hpp index 2b2800e2..b1cb28e6 100644 --- a/include/boost/redis/detail/connection_logger.hpp +++ b/include/boost/redis/detail/connection_logger.hpp @@ -36,7 +36,7 @@ class connection_logger { void on_connect(system::error_code const& ec, asio::ip::tcp::endpoint const& ep); void on_connect(system::error_code const& ec, std::string_view unix_socket_ep); void on_ssl_handshake(system::error_code const& ec); - void on_write(system::error_code const& ec, std::size_t n); + void on_write(std::size_t n); void on_read(system::error_code const& ec, std::size_t n); void on_setup(system::error_code const& ec, generic_response const& resp); void log(logger::level lvl, std::string_view msg); diff --git a/include/boost/redis/detail/connection_state.hpp b/include/boost/redis/detail/connection_state.hpp index 688c56d5..8e13e43c 100644 --- a/include/boost/redis/detail/connection_state.hpp +++ b/include/boost/redis/detail/connection_state.hpp @@ -25,7 +25,6 @@ struct connection_state { multiplexer mpx{}; generic_response setup_resp{}; request ping_req{}; - generic_response ping_resp{}; }; } // namespace boost::redis::detail diff --git a/include/boost/redis/detail/helper.hpp b/include/boost/redis/detail/helper.hpp deleted file mode 100644 index 58a7fe97..00000000 --- a/include/boost/redis/detail/helper.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2018-2024 Marcelo Zimbres Silva (mzimbres@gmail.com) - * - * Distributed under the Boost Software License, Version 1.0. (See - * accompanying file LICENSE.txt) - */ - -#ifndef BOOST_REDIS_HELPER_HPP -#define BOOST_REDIS_HELPER_HPP - -#include - -namespace boost::redis::detail { - -template -auto is_cancelled(T const& self) -{ - return self.get_cancellation_state().cancelled() != asio::cancellation_type_t::none; -} - -} // namespace boost::redis::detail - -#endif // BOOST_REDIS_HELPER_HPP diff --git a/include/boost/redis/detail/multiplexer.hpp b/include/boost/redis/detail/multiplexer.hpp index 040ba1d2..14afc6b5 100644 --- a/include/boost/redis/detail/multiplexer.hpp +++ b/include/boost/redis/detail/multiplexer.hpp @@ -18,6 +18,7 @@ #include +#include #include #include #include @@ -136,12 +137,12 @@ class multiplexer { [[nodiscard]] auto prepare_write() -> std::size_t; - // To be called after a successful write operation. - // Returns the number of requests that have been released because - // they don't have a response e.g. SUBSCRIBE. + // To be called after a write operation. + // Returns true once all the bytes in the buffer generated by prepare_write + // have been written. // Must be called before cancel_on_conn_lost() because it might change // request status. - auto commit_write() -> std::size_t; + auto commit_write(std::size_t bytes_written) -> bool; // To be called after a successful read operation. // Must be called before cancel_on_conn_lost() because it might change @@ -175,9 +176,9 @@ class multiplexer { void cancel_on_conn_lost(); [[nodiscard]] - auto get_write_buffer() noexcept -> std::string_view + auto get_write_buffer() const noexcept -> std::string_view { - return std::string_view{write_buffer_}; + return std::string_view{write_buffer_}.substr(write_offset_); } [[nodiscard]] @@ -199,9 +200,6 @@ class multiplexer { return usage_; } - [[nodiscard]] - auto is_writing() const noexcept -> bool; - void set_config(config const& cfg); private: @@ -210,15 +208,15 @@ class multiplexer { [[nodiscard]] auto is_next_push(std::string_view data) const noexcept -> bool; - // Releases the number of requests that have been released. - [[nodiscard]] - auto release_push_requests() -> std::size_t; + // Completes requests that don't expect a response + void release_push_requests(); [[nodiscard]] consume_result consume_impl(system::error_code& ec); read_buffer read_buffer_; std::string write_buffer_; + std::size_t write_offset_{}; // how many bytes of the write buffer have been written? std::deque> reqs_; resp3::parser parser_{}; bool on_push_ = false; diff --git a/include/boost/redis/detail/reader_fsm.hpp b/include/boost/redis/detail/reader_fsm.hpp index e0d7de11..3e7a7032 100644 --- a/include/boost/redis/detail/reader_fsm.hpp +++ b/include/boost/redis/detail/reader_fsm.hpp @@ -13,6 +13,7 @@ #include #include +#include #include namespace boost::redis::detail { @@ -21,7 +22,8 @@ class read_buffer; class reader_fsm { public: - struct action { + class action { + public: enum class type { read_some, @@ -29,24 +31,52 @@ class reader_fsm { done, }; - action(type t, std::size_t push_size = 0u) noexcept - : type_(t) - , push_size_(push_size) - { } - action(system::error_code ec) noexcept : type_(type::done) , ec_(ec) { } - static action notify_push_receiver(std::size_t bytes) + static action read_some(std::chrono::steady_clock::duration timeout) { return {timeout}; } + + static action notify_push_receiver(std::size_t bytes) { return {bytes}; } + + type get_type() const { return type_; } + + system::error_code error() const + { + BOOST_ASSERT(type_ == type::done); + return ec_; + } + + std::chrono::steady_clock::duration timeout() const { - return {type::notify_push_receiver, bytes}; + BOOST_ASSERT(type_ == type::read_some); + return timeout_; } + std::size_t push_size() const + { + BOOST_ASSERT(type_ == type::notify_push_receiver); + return push_size_; + } + + private: + action(std::size_t push_size) noexcept + : type_(type::notify_push_receiver) + , push_size_(push_size) + { } + + action(std::chrono::steady_clock::duration t) noexcept + : type_(type::read_some) + , timeout_(t) + { } + type type_; - std::size_t push_size_{}; - system::error_code ec_; + union { + system::error_code ec_; + std::chrono::steady_clock::duration timeout_; + std::size_t push_size_{}; + }; }; action resume( diff --git a/include/boost/redis/detail/writer_fsm.hpp b/include/boost/redis/detail/writer_fsm.hpp index 86f11cc2..187e6743 100644 --- a/include/boost/redis/detail/writer_fsm.hpp +++ b/include/boost/redis/detail/writer_fsm.hpp @@ -9,9 +9,12 @@ #ifndef BOOST_REDIS_WRITER_FSM_HPP #define BOOST_REDIS_WRITER_FSM_HPP +#include + #include #include +#include #include // Sans-io algorithm for the writer task, as a finite state machine @@ -25,37 +28,62 @@ class multiplexer; // What should we do next? enum class writer_action_type { - done, // Call the final handler - write, // Issue a write on the stream - wait, // Wait until there is data to be written + done, // Call the final handler + write_some, // Issue a write on the stream + wait, // Wait until there is data to be written }; -struct writer_action { - writer_action_type type; - system::error_code ec; +class writer_action { + writer_action_type type_; + union { + system::error_code ec_; + std::chrono::steady_clock::duration timeout_; + }; - writer_action(writer_action_type type) noexcept - : type{type} + writer_action(writer_action_type type, std::chrono::steady_clock::duration t) noexcept + : type_{type} + , timeout_{t} { } +public: + writer_action_type type() const { return type_; } + writer_action(system::error_code ec) noexcept - : type{writer_action_type::done} - , ec{ec} + : type_{writer_action_type::done} + , ec_{ec} { } + + static writer_action write_some(std::chrono::steady_clock::duration timeout) + { + return {writer_action_type::write_some, timeout}; + } + + static writer_action wait(std::chrono::steady_clock::duration timeout) + { + return {writer_action_type::wait, timeout}; + } + + system::error_code error() const + { + BOOST_ASSERT(type_ == writer_action_type::done); + return ec_; + } + + std::chrono::steady_clock::duration timeout() const + { + BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait); + return timeout_; + } }; class writer_fsm { int resume_point_{0}; - multiplexer* mpx_; - connection_logger* logger_; public: - writer_fsm(multiplexer& mpx, connection_logger& logger) noexcept - : mpx_(&mpx) - , logger_(&logger) - { } + writer_fsm() = default; writer_action resume( + connection_state& st, system::error_code ec, std::size_t bytes_written, asio::cancellation_type_t cancel_state); diff --git a/include/boost/redis/error.hpp b/include/boost/redis/error.hpp index a252789f..506d0139 100644 --- a/include/boost/redis/error.hpp +++ b/include/boost/redis/error.hpp @@ -68,7 +68,7 @@ enum class error /// Connect timeout connect_timeout, - /// Connect timeout + /// The server didn't answer the health checks on time and didn't send any data during the health check period. pong_timeout, /// SSL handshake timeout @@ -91,6 +91,9 @@ enum class error /// Reading data from the socket would exceed the maximum size allowed of the read buffer. exceeds_maximum_read_buffer_size, + + /// Timeout while writing data to the server. + write_timeout, }; /** diff --git a/include/boost/redis/impl/connection.ipp b/include/boost/redis/impl/connection.ipp index b23cc8b4..f127a469 100644 --- a/include/boost/redis/impl/connection.ipp +++ b/include/boost/redis/impl/connection.ipp @@ -22,9 +22,8 @@ logger detail::make_stderr_logger(logger::level lvl, std::string prefix) } system::error_code detail::translate_parallel_group_errors( - std::array order, + std::array order, system::error_code setup_ec, - system::error_code health_check_ec, system::error_code reader_ec, system::error_code writer_ec) { @@ -41,9 +40,8 @@ system::error_code detail::translate_parallel_group_errors( // excluding the setup task std::size_t task_number = order[0] == 0u ? order[1] : order[0]; switch (task_number) { - case 1u: return health_check_ec; - case 2u: return reader_ec; - case 3u: return writer_ec; + case 1u: return reader_ec; + case 2u: return writer_ec; default: BOOST_ASSERT(false); return system::error_code(); } } diff --git a/include/boost/redis/impl/connection_logger.ipp b/include/boost/redis/impl/connection_logger.ipp index dc5fa3e5..968f1705 100644 --- a/include/boost/redis/impl/connection_logger.ipp +++ b/include/boost/redis/impl/connection_logger.ipp @@ -114,21 +114,16 @@ void connection_logger::on_ssl_handshake(system::error_code const& ec) logger_.fn(logger::level::info, msg_); } -void connection_logger::on_write(system::error_code const& ec, std::size_t n) +void connection_logger::on_write(std::size_t n) { - if (logger_.lvl < logger::level::info) + if (logger_.lvl < logger::level::debug) return; - if (ec) { - msg_ = "Writer task error: "; - format_error_code(ec, msg_); - } else { - msg_ = "Writer task: "; - msg_ += std::to_string(n); - msg_ += " bytes written."; - } + msg_ = "Writer task: "; + msg_ += std::to_string(n); + msg_ += " bytes written."; - logger_.fn(logger::level::info, msg_); + logger_.fn(logger::level::debug, msg_); } void connection_logger::on_read(system::error_code const& ec, std::size_t bytes_read) diff --git a/include/boost/redis/impl/error.ipp b/include/boost/redis/impl/error.ipp index 5133feb1..dcd7ec67 100644 --- a/include/boost/redis/impl/error.ipp +++ b/include/boost/redis/impl/error.ipp @@ -54,6 +54,8 @@ struct error_category_impl : system::error_category { case error::exceeds_maximum_read_buffer_size: return "Reading data from the socket would exceed the maximum size allowed of the read " "buffer."; + case error::write_timeout: + return "Timeout while writing data to the server."; default: BOOST_ASSERT(false); return "Boost.Redis error."; } } diff --git a/include/boost/redis/impl/multiplexer.ipp b/include/boost/redis/impl/multiplexer.ipp index 176a3cee..b6d118ce 100644 --- a/include/boost/redis/impl/multiplexer.ipp +++ b/include/boost/redis/impl/multiplexer.ipp @@ -11,6 +11,7 @@ #include #include +#include #include namespace boost::redis::detail { @@ -65,13 +66,19 @@ void multiplexer::cancel(std::shared_ptr const& ptr) } } -std::size_t multiplexer::commit_write() +bool multiplexer::commit_write(std::size_t bytes_written) { BOOST_ASSERT(!cancel_run_called_); - usage_.bytes_sent += std::size(write_buffer_); + BOOST_ASSERT(bytes_written + write_offset_ <= write_buffer_.size()); - // We have to clear the payload right after writing it to use it - // as a flag that informs there is no ongoing write. + usage_.bytes_sent += bytes_written; + write_offset_ += bytes_written; + + // Are there still more bytes to write? + if (write_offset_ < write_buffer_.size()) + return false; + + // We've written all the bytes in the write buffer. write_buffer_.clear(); // There is small optimization possible here: traverse only the @@ -83,7 +90,9 @@ std::size_t multiplexer::commit_write() } }); - return release_push_requests(); + release_push_requests(); + + return true; } void multiplexer::add(std::shared_ptr const& info) @@ -151,8 +160,7 @@ consume_result multiplexer::consume_impl(system::error_code& ec) return consume_result::got_response; } -std::pair -multiplexer::consume(system::error_code& ec) +std::pair multiplexer::consume(system::error_code& ec) { BOOST_ASSERT(!cancel_run_called_); @@ -172,20 +180,14 @@ multiplexer::consume(system::error_code& ec) return std::make_pair(consume_result::needs_more, consumed); } -auto multiplexer::prepare_read() noexcept -> system::error_code -{ - return read_buffer_.prepare(); -} +auto multiplexer::prepare_read() noexcept -> system::error_code { return read_buffer_.prepare(); } auto multiplexer::get_prepared_read_buffer() noexcept -> read_buffer::span_type { return read_buffer_.get_prepared(); } -void multiplexer::commit_read(std::size_t bytes_read) -{ - read_buffer_.commit(bytes_read); -} +void multiplexer::commit_read(std::size_t bytes_read) { read_buffer_.commit(bytes_read); } auto multiplexer::get_read_buffer_size() const noexcept -> std::size_t { @@ -196,6 +198,7 @@ void multiplexer::reset() { read_buffer_.clear(); write_buffer_.clear(); + write_offset_ = 0u; parser_.reset(); on_push_ = false; cancel_run_called_ = false; @@ -222,6 +225,8 @@ std::size_t multiplexer::prepare_write() usage_.commands_sent += ri->get_request().get_commands(); }); + write_offset_ = 0u; + auto const d = std::distance(point, std::cend(reqs_)); return static_cast(d); } @@ -331,7 +336,7 @@ bool multiplexer::is_next_push(std::string_view data) const noexcept return reqs_.front()->is_waiting(); } -std::size_t multiplexer::release_push_requests() +void multiplexer::release_push_requests() { auto point = std::stable_partition( std::begin(reqs_), @@ -344,13 +349,9 @@ std::size_t multiplexer::release_push_requests() ptr->notify_done(); }); - auto const d = std::distance(point, std::end(reqs_)); reqs_.erase(point, std::end(reqs_)); - return static_cast(d); } -bool multiplexer::is_writing() const noexcept { return !write_buffer_.empty(); } - void multiplexer::set_receive_adapter(any_adapter adapter) { receive_adapter_ = std::move(adapter); diff --git a/include/boost/redis/impl/reader_fsm.ipp b/include/boost/redis/impl/reader_fsm.ipp index e21fa144..2ae05a6b 100644 --- a/include/boost/redis/impl/reader_fsm.ipp +++ b/include/boost/redis/impl/reader_fsm.ipp @@ -32,14 +32,23 @@ reader_fsm::action reader_fsm::resume( return {ec}; } - // Read + // Read. The connection might spend health_check_interval without writing data. + // Give it another health_check_interval for the response to arrive. + // If we don't get anything in this time, consider the connection as dead st.logger.trace("Reader task: issuing read"); - BOOST_REDIS_YIELD(resume_point_, 1, action::type::read_some) + BOOST_REDIS_YIELD(resume_point_, 1, action::read_some(2 * st.cfg.health_check_interval)) // Check for cancellations if (is_terminal_cancel(cancel_state)) { st.logger.trace("Reader task: cancelled (1)"); - return {asio::error::operation_aborted}; + return system::error_code(asio::error::operation_aborted); + } + + // Translate timeout errors caused by operation_aborted to more legible ones. + // A timeout here means that we didn't receive data in time. + // Note that cancellation is already handled by the above statement. + if (ec == asio::error::operation_aborted) { + ec = error::pong_timeout; } // Log what we read @@ -53,7 +62,7 @@ reader_fsm::action reader_fsm::resume( // TODO: If an error occurred but data was read (i.e. // bytes_read != 0) we should try to process that data and // deliver it to the user before calling cancel_run. - return {ec}; + return ec; } // Process the data that we've read @@ -64,7 +73,7 @@ reader_fsm::action reader_fsm::resume( // TODO: Perhaps log what has not been consumed to aid // debugging. st.logger.trace("Reader task: error processing message", ec); - return {ec}; + return ec; } if (res_.first == consume_result::needs_more) { @@ -77,13 +86,13 @@ reader_fsm::action reader_fsm::resume( // Check for cancellations if (is_terminal_cancel(cancel_state)) { st.logger.trace("Reader task: cancelled (2)"); - return {asio::error::operation_aborted}; + return system::error_code(asio::error::operation_aborted); } // Check for other errors if (ec) { st.logger.trace("Reader task: error notifying push receiver", ec); - return {ec}; + return ec; } } else { // TODO: Here we should notify the exec operation that @@ -98,7 +107,7 @@ reader_fsm::action reader_fsm::resume( } BOOST_ASSERT(false); - return {system::error_code()}; + return system::error_code(); } } // namespace boost::redis::detail diff --git a/include/boost/redis/impl/writer_fsm.ipp b/include/boost/redis/impl/writer_fsm.ipp index bb4304d2..8e129dfd 100644 --- a/include/boost/redis/impl/writer_fsm.ipp +++ b/include/boost/redis/impl/writer_fsm.ipp @@ -9,7 +9,9 @@ #ifndef BOOST_REDIS_WRITER_FSM_IPP #define BOOST_REDIS_WRITER_FSM_IPP +#include #include +#include #include #include #include @@ -24,7 +26,33 @@ namespace boost::redis::detail { +inline void process_ping_node( + connection_logger& lgr, + resp3::basic_node const& nd, + system::error_code& ec) +{ + switch (nd.data_type) { + case resp3::type::simple_error: ec = redis::error::resp3_simple_error; break; + case resp3::type::blob_error: ec = redis::error::resp3_blob_error; break; + default: ; + } + + if (ec) { + lgr.log(logger::level::info, "Health checker: server answered ping with an error", nd.value); + } +} + +inline any_adapter make_ping_adapter(connection_logger& lgr) +{ + return any_adapter{ + [&lgr](any_adapter::parse_event evt, resp3::node_view const& nd, system::error_code& ec) { + if (evt == any_adapter::parse_event::node) + process_ping_node(lgr, nd, ec); + }}; +} + writer_action writer_fsm::resume( + connection_state& st, system::error_code ec, std::size_t bytes_written, asio::cancellation_type_t cancel_state) @@ -34,37 +62,58 @@ writer_action writer_fsm::resume( for (;;) { // Attempt to write while we have requests ready to send - while (mpx_->prepare_write() != 0u) { - // Write - BOOST_REDIS_YIELD(resume_point_, 1, writer_action_type::write) - - // Mark requests as written - if (!ec) - mpx_->commit_write(); - - // Check for cancellations - if (is_terminal_cancel(cancel_state)) { - logger_->trace("Writer task: cancelled (1)."); - return system::error_code(asio::error::operation_aborted); - } - - // Log what we wrote - logger_->on_write(ec, bytes_written); - - // Check for errors - if (ec) { - return ec; + while (st.mpx.prepare_write() != 0u) { + // Write an entire message. We can't use asio::async_write because we want + // to apply timeouts to individual write operations + for (;;) { + // Write what we can. If nothing has been written for the health check + // interval, we consider the connection as failed + BOOST_REDIS_YIELD( + resume_point_, + 1, + writer_action::write_some(st.cfg.health_check_interval)) + + // Commit the received bytes. This accounts for partial success + bool finished = st.mpx.commit_write(bytes_written); + st.logger.on_write(bytes_written); + + // Check for cancellations and translate error codes + if (is_terminal_cancel(cancel_state)) + ec = asio::error::operation_aborted; + else if (ec == asio::error::operation_aborted) + ec = error::write_timeout; + + // Check for errors + if (ec) { + if (ec == asio::error::operation_aborted) { + st.logger.trace("Writer task: cancelled (1)."); + } else { + st.logger.trace("Writer task error", ec); + } + return ec; + } + + // Are we done yet? + if (finished) + break; } } - // No more requests ready to be written. Wait for more - BOOST_REDIS_YIELD(resume_point_, 2, writer_action_type::wait) + // No more requests ready to be written. Wait for more, or until we need to send a PING + BOOST_REDIS_YIELD(resume_point_, 2, writer_action::wait(st.cfg.health_check_interval)) // Check for cancellations if (is_terminal_cancel(cancel_state)) { - logger_->trace("Writer task: cancelled (2)."); + st.logger.trace("Writer task: cancelled (2)."); return system::error_code(asio::error::operation_aborted); } + + // If we weren't notified, it's because there is no data and we should send a health check + if (!ec) { + auto elem = make_elem(st.ping_req, make_ping_adapter(st.logger)); + elem->set_done_callback([] { }); + st.mpx.add(elem); + } } } diff --git a/test/sansio_utils.hpp b/test/sansio_utils.hpp index 7428b116..47a49ff3 100644 --- a/test/sansio_utils.hpp +++ b/test/sansio_utils.hpp @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -44,6 +45,11 @@ struct log_fixture { logger make_logger(); }; +constexpr auto to_milliseconds(std::chrono::steady_clock::duration d) +{ + return std::chrono::duration_cast(d).count(); +} + } // namespace boost::redis::detail #endif // BOOST_REDIS_TEST_SANSIO_UTILS_HPP diff --git a/test/test_conn_check_health.cpp b/test/test_conn_check_health.cpp index c50cdd9d..78062e3b 100644 --- a/test/test_conn_check_health.cpp +++ b/test/test_conn_check_health.cpp @@ -6,14 +6,19 @@ #include #include +#include #include #include +#include +#include #include #include "common.hpp" +#include #include +#include namespace net = boost::asio; namespace redis = boost::redis; @@ -21,6 +26,7 @@ using error_code = boost::system::error_code; using connection = boost::redis::connection; using boost::redis::request; using boost::redis::ignore; +using boost::redis::generic_response; using namespace std::chrono_literals; namespace { @@ -154,6 +160,98 @@ void test_disabled() BOOST_TEST(exec2_finished); } +// Receiving data is sufficient to consider our connection healthy. +// Sends a blocking request that causes PINGs to not be answered, +// and subscribes to a channel to receive pushes periodically. +// This simulates situations of heavy load, where PINGs may not be answered on time. +class test_flexible { + net::io_context ioc; + connection conn1{ioc}; // The one that simulates a heavy load condition + connection conn2{ioc}; // Publishes messages + net::steady_timer timer{ioc}; + request publish_req; + bool run1_finished = false, run2_finished = false, exec_finished{false}, + publisher_finished{false}; + + // Starts publishing messages to the channel + void start_publish() + { + conn2.async_exec(publish_req, ignore, [this](error_code ec, std::size_t) { + BOOST_TEST_EQ(ec, error_code()); + + if (exec_finished) { + // The blocking request finished, we're done + conn2.cancel(); + publisher_finished = true; + } else { + // Wait for some time and publish again + timer.expires_after(100ms); + timer.async_wait([this](error_code ec) { + BOOST_TEST_EQ(ec, error_code()); + start_publish(); + }); + } + }); + } + + // Generates a sufficiently unique name for channels so + // tests may be run in parallel for different configurations + static std::string make_unique_id() + { + auto t = std::chrono::high_resolution_clock::now(); + return "test-flexible-health-checks-" + std::to_string(t.time_since_epoch().count()); + } + +public: + test_flexible() = default; + + void run() + { + // Setup + auto cfg = make_test_config(); + cfg.health_check_interval = 500ms; + generic_response resp; + + std::string channel_name = make_unique_id(); + publish_req.push("PUBLISH", channel_name, "test_health_check_flexible"); + + // This request will block for much longer than the health check + // interval. If we weren't receiving pushes, the connection would be considered dead. + // If this request finishes successfully, the health checker is doing good + request blocking_req; + blocking_req.push("SUBSCRIBE", channel_name); + blocking_req.push("BLPOP", "any", 2); + blocking_req.get_config().cancel_if_unresponded = true; + blocking_req.get_config().cancel_on_connection_lost = true; + + conn1.async_run(cfg, [&](error_code ec) { + run1_finished = true; + BOOST_TEST_EQ(ec, net::error::operation_aborted); + }); + + conn2.async_run(cfg, [&](error_code ec) { + run2_finished = true; + BOOST_TEST_EQ(ec, net::error::operation_aborted); + }); + + // BLPOP will return NIL, so we can't use ignore + conn1.async_exec(blocking_req, resp, [&](error_code ec, std::size_t) { + exec_finished = true; + BOOST_TEST_EQ(ec, error_code()); + conn1.cancel(); + }); + + start_publish(); + + ioc.run_for(test_timeout); + + BOOST_TEST(run1_finished); + BOOST_TEST(run2_finished); + BOOST_TEST(exec_finished); + BOOST_TEST(publisher_finished); + } +}; + } // namespace int main() @@ -161,6 +259,7 @@ int main() test_reconnection(); test_error_code(); test_disabled(); + test_flexible().run(); return boost::report_errors(); } \ No newline at end of file diff --git a/test/test_conn_echo_stress.cpp b/test/test_conn_echo_stress.cpp index d3db9fe6..5b37489d 100644 --- a/test/test_conn_echo_stress.cpp +++ b/test/test_conn_echo_stress.cpp @@ -95,7 +95,6 @@ BOOST_AUTO_TEST_CASE(echo_stress) net::io_context ctx; connection conn{ctx}; auto cfg = make_test_config(); - cfg.health_check_interval = std::chrono::seconds::zero(); // Number of coroutines that will send pings sharing the same // connection to redis. diff --git a/test/test_conn_exec.cpp b/test/test_conn_exec.cpp index 4ac3bba9..28a23497 100644 --- a/test/test_conn_exec.cpp +++ b/test/test_conn_exec.cpp @@ -134,8 +134,7 @@ BOOST_AUTO_TEST_CASE(large_number_of_concurrent_requests_issue_170) auto conn = std::make_shared(ioc); auto cfg = make_test_config(); - cfg.health_check_interval = std::chrono::seconds(0); - conn->async_run(cfg, {}, net::detached); + conn->async_run(cfg, net::detached); constexpr int repeat = 8000; int remaining = repeat; diff --git a/test/test_exec_fsm.cpp b/test/test_exec_fsm.cpp index ada451b6..f2385355 100644 --- a/test/test_exec_fsm.cpp +++ b/test/test_exec_fsm.cpp @@ -138,7 +138,7 @@ void test_success() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a successful read read(mpx, "$5\r\nhello\r\n"); @@ -177,7 +177,7 @@ void test_parse_error() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a read that will trigger an error. // The second field should be a number (rather than the empty string). @@ -239,7 +239,7 @@ void test_not_connected() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Simulate a successful read read(mpx, "$5\r\nhello\r\n"); @@ -329,7 +329,7 @@ void test_cancel_notwaiting_terminal_partial() // The multiplexer starts writing the request BOOST_TEST_EQ_MSG(mpx.prepare_write(), 1u, tc.name); - BOOST_TEST_EQ_MSG(mpx.commit_write(), 0u, tc.name); + BOOST_TEST_EQ_MSG(mpx.commit_write(mpx.get_write_buffer().size()), true, tc.name); // A cancellation arrives act = fsm.resume(true, tc.type); @@ -368,7 +368,7 @@ void test_cancel_notwaiting_total() // Simulate a successful write BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // We got requested a cancellation here, but we can't honor it act = fsm.resume(true, asio::cancellation_type_t::total); diff --git a/test/test_low_level.cpp b/test/test_low_level.cpp index c173d750..dc68e0ec 100644 --- a/test/test_low_level.cpp +++ b/test/test_low_level.cpp @@ -529,6 +529,7 @@ BOOST_AUTO_TEST_CASE(cover_error) check_error("boost.redis", boost::redis::error::incompatible_node_depth); check_error("boost.redis", boost::redis::error::resp3_hello); check_error("boost.redis", boost::redis::error::exceeds_maximum_read_buffer_size); + check_error("boost.redis", boost::redis::error::write_timeout); } std::string get_type_as_str(boost::redis::resp3::type t) diff --git a/test/test_multiplexer.cpp b/test/test_multiplexer.cpp index 96705a18..22b56246 100644 --- a/test/test_multiplexer.cpp +++ b/test/test_multiplexer.cpp @@ -15,14 +15,13 @@ #include #include +#include "sansio_utils.hpp" + #include #include #include -#include #include -#include "sansio_utils.hpp" - using boost::redis::request; using boost::redis::detail::multiplexer; using boost::redis::detail::consume_result; @@ -118,7 +117,7 @@ void test_request_needs_more() // Add the element to the multiplexer and simulate a successful write mpx.add(item1.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(item1.req.payload().size())); BOOST_TEST(item1.elem_ptr->is_written()); BOOST_TEST(!item1.done); @@ -161,6 +160,13 @@ void test_several_requests() BOOST_TEST_EQ(mpx.prepare_write(), 3u); BOOST_TEST_EQ(mpx.prepare_write(), 0u); + // The write buffer holds the 3 requests, coalesced + constexpr std::string_view expected_buffer = + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"; + BOOST_TEST_EQ(mpx.get_write_buffer(), expected_buffer); + // After coalescing the requests for writing their statuses should // be changed to "staged". BOOST_TEST(item1.elem_ptr->is_staged()); @@ -180,7 +186,7 @@ void test_several_requests() // The commit_write call informs the multiplexer the payload was // sent (e.g. written to the socket). This step releases requests // that has no response. - BOOST_TEST_EQ(mpx.commit_write(), 1u); + BOOST_TEST(mpx.commit_write(expected_buffer.size())); // The staged status should now have changed to written. BOOST_TEST(item1.elem_ptr->is_written()); @@ -222,6 +228,61 @@ void test_several_requests() BOOST_TEST(item3.done); } +void test_short_writes() +{ + // Setup + multiplexer mpx; + test_item item1{}; + test_item item2{false}; + + // Add some requests to the multiplexer. + mpx.add(item1.elem_ptr); + mpx.add(item2.elem_ptr); + BOOST_TEST(item1.elem_ptr->is_waiting()); + BOOST_TEST(item2.elem_ptr->is_waiting()); + + // Start writing them + BOOST_TEST_EQ(mpx.prepare_write(), 2u); + BOOST_TEST_EQ( + mpx.get_write_buffer(), + "*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write a small part. The write buffer gets updated, but request status is not changed + BOOST_TEST_NOT(mpx.commit_write(8u)); + BOOST_TEST_EQ( + mpx.get_write_buffer(), + "PING\r\n$7\r\ncmd-arg\r\n" + "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write another part + BOOST_TEST_NOT(mpx.commit_write(19u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // A zero-size write doesn't cause trouble + BOOST_TEST_NOT(mpx.commit_write(0u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write everything except the last byte + BOOST_TEST_NOT(mpx.commit_write(31u)); + BOOST_TEST_EQ(mpx.get_write_buffer(), "\n"); + BOOST_TEST(item1.elem_ptr->is_staged()); + BOOST_TEST(item2.elem_ptr->is_staged()); + + // Write the last byte + BOOST_TEST(mpx.commit_write(1u)); + BOOST_TEST(item1.elem_ptr->is_written()); + BOOST_TEST(item2.elem_ptr->is_done()); +} + // The response to a request is received before its write // confirmation. This might happen on heavy load void test_request_response_before_write() @@ -251,7 +312,7 @@ void test_request_response_before_write() item.reset(); // The write gets confirmed and causes no problem - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); } void test_push() @@ -395,7 +456,7 @@ void test_mix_responses_pushes() mpx.add(item1.elem_ptr); mpx.add(item2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); BOOST_TEST(item1.elem_ptr->is_written()); BOOST_TEST(!item1.done); BOOST_TEST(item2.elem_ptr->is_written()); @@ -488,7 +549,7 @@ void test_cancel_waiting() // We can progress the other request normally BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); error_code ec; read(mpx, "$11\r\nHello world\r\n"); auto res = mpx.consume(ec); @@ -518,7 +579,7 @@ void test_cancel_staged() item1.reset(); // Verify we don't reference this item anyhow // The write gets confirmed - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // The cancelled request's response arrives. It gets discarded error_code ec; @@ -556,7 +617,7 @@ void test_cancel_staged_command_without_response() item1.reset(); // Verify we don't reference this item anyhow // The write gets confirmed - BOOST_TEST_EQ(mpx.commit_write(), 1u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // The 2nd request's response arrives. It gets parsed successfully error_code ec; @@ -582,7 +643,7 @@ void test_cancel_written() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Cancel the first request mpx.cancel(item1->elem_ptr); @@ -623,7 +684,7 @@ void test_cancel_written_half_parsed_response() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Get the response for the 1st command in req1 error_code ec; @@ -686,7 +747,7 @@ void test_cancel_written_null_error() // A write succeeds BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); // Cancel the first request mpx.cancel(item1->elem_ptr); @@ -741,7 +802,7 @@ void test_cancel_on_connection_lost() mpx.add(item_written1.elem_ptr); mpx.add(item_written2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); mpx.add(item_staged1.elem_ptr); mpx.add(item_staged2.elem_ptr); @@ -794,7 +855,7 @@ void test_cancel_on_connection_lost_abandoned() mpx.add(item_written1->elem_ptr); mpx.add(item_written2->elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 2u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); mpx.add(item_staged1->elem_ptr); mpx.add(item_staged2->elem_ptr); @@ -898,7 +959,7 @@ void test_reset() // We're able to add write requests and read responses - all state was reset mpx.add(item2.elem_ptr); BOOST_TEST_EQ(mpx.prepare_write(), 1u); - BOOST_TEST_EQ(mpx.commit_write(), 0u); + BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size())); std::string_view response_buffer = "$11\r\nHello world\r\n"; read(mpx, response_buffer); @@ -919,6 +980,7 @@ int main() test_request_needs_more(); test_several_requests(); test_request_response_before_write(); + test_short_writes(); test_push(); test_push_needs_more(); test_push_heuristics_no_request(); diff --git a/test/test_reader_fsm.cpp b/test/test_reader_fsm.cpp index 1c204131..f215a54d 100644 --- a/test/test_reader_fsm.cpp +++ b/test/test_reader_fsm.cpp @@ -18,6 +18,7 @@ #include "sansio_utils.hpp" +#include #include namespace net = boost::asio; @@ -32,6 +33,7 @@ using redis::config; using redis::detail::connection_state; using action = redis::detail::reader_fsm::action; using redis::logger; +using namespace std::chrono_literals; // Operators static const char* to_string(action::type type) @@ -50,16 +52,29 @@ std::ostream& operator<<(std::ostream& os, action::type type) { return os << to_ bool operator==(const action& lhs, const action& rhs) noexcept { - return lhs.type_ == rhs.type_ && lhs.push_size_ == rhs.push_size_ && lhs.ec_ == rhs.ec_; + if (lhs.get_type() != rhs.get_type()) + return false; + switch (lhs.get_type()) { + case action::type::done: return lhs.error() == rhs.error(); + case action::type::read_some: return lhs.timeout() == rhs.timeout(); + case action::type::notify_push_receiver: return lhs.push_size() == rhs.push_size(); + default: BOOST_ASSERT(false); return false; + } } std::ostream& operator<<(std::ostream& os, const action& act) { - os << "action{ .type=" << act.type_; - if (act.type_ == action::type::done) - os << ", .error=" << act.ec_; - else if (act.type_ == action::type::notify_push_receiver) - os << ", .push_size=" << act.push_size_; + auto t = act.get_type(); + os << "action{ .type=" << t; + switch (t) { + case action::type::done: os << ", .error=" << act.error(); break; + case action::type::read_some: + os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms"; + break; + case action::type::notify_push_receiver: os << ", .push_size=" << act.push_size(); break; + default: BOOST_ASSERT(false); + } + return os << " }"; } @@ -84,7 +99,11 @@ struct fixture : redis::detail::log_fixture { connection_state st{make_logger()}; generic_response resp; - fixture() { st.mpx.set_receive_adapter(any_adapter{resp}); } + fixture() + { + st.mpx.set_receive_adapter(any_adapter{resp}); + st.cfg.health_check_interval = 3s; + } }; void test_push() @@ -94,7 +113,7 @@ void test_push() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = @@ -118,7 +137,7 @@ void test_push() // All pushes were delivered so the fsm should demand more data act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Check logging fix.check_log({ @@ -135,7 +154,7 @@ void test_read_needs_more() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Split the incoming message in three random parts and deliver // them to the reader individually. @@ -144,12 +163,12 @@ void test_read_needs_more() // Passes the first part to the fsm. copy_to(fix.st.mpx, msg[0]); act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the second part to the fsm. copy_to(fix.st.mpx, msg[1]); act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the third and last part to the fsm, next it should ask us // to deliver the message. @@ -159,7 +178,7 @@ void test_read_needs_more() // All pushes were delivered so the fsm should demand more data act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Check logging fix.check_log({ @@ -175,6 +194,44 @@ void test_read_needs_more() }); } +void test_health_checks_disabled() +{ + fixture fix; + reader_fsm fsm; + fix.st.cfg.health_check_interval = 0s; + + // Initiate + auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Split the message into two so we cover both the regular read and the needs more branch + constexpr std::string_view msg[] = {">3\r\n+msg1\r\n+ms", "g2\r\n+msg3\r\n"}; + + // Passes the first part to the fsm. + copy_to(fix.st.mpx, msg[0]); + act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Push delivery complete + copy_to(fix.st.mpx, msg[1]); + act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::notify_push_receiver(25u)); + + // All pushes were delivered so the fsm should demand more data + act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(0s)); + + // Check logging + fix.check_log({ + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 14 bytes read" }, + {logger::level::debug, "Reader task: incomplete message received"}, + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 11 bytes read" }, + {logger::level::debug, "Reader task: issuing read" }, + }); +} + void test_read_error() { fixture fix; @@ -182,7 +239,7 @@ void test_read_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">1\r\n+msg1\r\n"; @@ -201,6 +258,29 @@ void test_read_error() }); } +// A timeout in a read means that the connection is unhealthy (i.e. a PING timed out) +void test_read_timeout() +{ + fixture fix; + reader_fsm fsm; + + // Initiate + auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); + BOOST_TEST_EQ(act, action::read_some(6s)); + + // Timeout + act = fsm.resume(fix.st, 0, {net::error::operation_aborted}, cancellation_type_t::none); + BOOST_TEST_EQ(act, error_code{redis::error::pong_timeout}); + + // Check logging + fix.check_log({ + // clang-format off + {logger::level::debug, "Reader task: issuing read" }, + {logger::level::debug, "Reader task: 0 bytes read, error: Pong timeout. [boost.redis:19]"}, + // clang-format on + }); +} + void test_parse_error() { fixture fix; @@ -208,7 +288,7 @@ void test_parse_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">a\r\n"; @@ -235,7 +315,7 @@ void test_push_deliver_error() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. std::string const payload = ">1\r\n+msg1\r\n"; @@ -269,7 +349,7 @@ void test_max_read_buffer_size() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Passes the first part to the fsm. std::string const part1 = ">3\r\n"; @@ -296,7 +376,7 @@ void test_cancel_read() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The read was cancelled (maybe after delivering some bytes) constexpr std::string_view payload = ">1\r\n"; @@ -322,7 +402,7 @@ void test_cancel_read_edge() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // Deliver a push, and notify a cancellation. // This can happen if the cancellation signal arrives before the read handler runs @@ -345,7 +425,7 @@ void test_cancel_push_delivery() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. constexpr std::string_view payload = @@ -377,7 +457,7 @@ void test_cancel_push_delivery_edge() // Initiate auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none); - BOOST_TEST_EQ(act.type_, action::type::read_some); + BOOST_TEST_EQ(act, action::read_some(6s)); // The fsm is asking for data. constexpr std::string_view payload = @@ -409,8 +489,10 @@ int main() { test_push(); test_read_needs_more(); + test_health_checks_disabled(); test_read_error(); + test_read_timeout(); test_parse_error(); test_push_deliver_error(); test_max_read_buffer_size(); diff --git a/test/test_writer_fsm.cpp b/test/test_writer_fsm.cpp index 61674693..f206b2e4 100644 --- a/test/test_writer_fsm.cpp +++ b/test/test_writer_fsm.cpp @@ -7,6 +7,7 @@ // #include +#include #include #include #include @@ -20,8 +21,10 @@ #include "sansio_utils.hpp" +#include #include #include +#include using namespace boost::redis; namespace asio = boost::asio; @@ -30,18 +33,20 @@ using detail::multiplexer; using detail::writer_action_type; using detail::consume_result; using detail::writer_action; +using detail::connection_state; using boost::system::error_code; using boost::asio::cancellation_type_t; using detail::connection_logger; +using namespace std::chrono_literals; // Operators static const char* to_string(writer_action_type value) { switch (value) { - case writer_action_type::done: return "writer_action_type::done"; - case writer_action_type::write: return "writer_action_type::write"; - case writer_action_type::wait: return "writer_action_type::wait"; - default: return ""; + case writer_action_type::done: return "writer_action_type::done"; + case writer_action_type::write_some: return "writer_action_type::write"; + case writer_action_type::wait: return "writer_action_type::wait"; + default: return ""; } } @@ -55,14 +60,30 @@ std::ostream& operator<<(std::ostream& os, writer_action_type type) bool operator==(const writer_action& lhs, const writer_action& rhs) noexcept { - return lhs.type == rhs.type && lhs.ec == rhs.ec; + if (lhs.type() != rhs.type()) + return false; + switch (lhs.type()) { + case writer_action_type::done: return lhs.error() == rhs.error(); + case writer_action_type::write_some: + case writer_action_type::wait: return lhs.timeout() == rhs.timeout(); + default: BOOST_ASSERT(false); + } + return false; } std::ostream& operator<<(std::ostream& os, const writer_action& act) { - os << "writer_action{ .type=" << act.type; - if (act.type == writer_action_type::done) - os << ", .error=" << act.ec; + auto t = act.type(); + os << "writer_action{ .type=" << t; + switch (t) { + case writer_action_type::done: os << ", .error=" << act.error(); break; + case writer_action_type::write_some: + case writer_action_type::wait: + os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms"; + break; + default: BOOST_ASSERT(false); + } + return os << " }"; } @@ -89,9 +110,14 @@ struct test_elem { }; struct fixture : detail::log_fixture { - multiplexer mpx; - connection_logger lgr{make_logger()}; - writer_fsm fsm{mpx, lgr}; + connection_state st{make_logger()}; + writer_fsm fsm; + + fixture() + { + st.ping_req.push("PING", "ping_msg"); // would be set up by the runner + st.cfg.health_check_interval = 4s; + } }; // A single request is written, then we wait and repeat @@ -102,35 +128,37 @@ void test_single_request() test_elem item1, item2; // A request arrives before the writer starts - fix.mpx.add(item1.elm); + fix.st.mpx.add(item1.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // The write completes successfully. The request is written, and we go back to sleep. - act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + act = fix.fsm + .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item1.elm->is_written()); // Another request arrives - fix.mpx.add(item2.elm); + fix.st.mpx.add(item2.elm); // The wait is cancelled to signal we've got a new request - act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item2.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + act = fix.fsm + .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item2.elm->is_written()); // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, }); } @@ -142,32 +170,34 @@ void test_request_arrives_while_writing() test_elem item1, item2; // A request arrives before the writer starts - fix.mpx.add(item1.elm); + fix.st.mpx.add(item1.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_staged()); // While the write is outstanding, a new request arrives - fix.mpx.add(item2.elm); + fix.st.mpx.add(item2.elm); // The write completes successfully. The request is written, // and we start writing the new one - act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + act = fix.fsm + .resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item1.elm->is_written()); BOOST_TEST(item2.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + act = fix.fsm + .resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item2.elm->is_written()); // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, }); } @@ -179,25 +209,166 @@ void test_no_request_at_startup() test_elem item; // Start. There is no request, so we wait - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); // A request arrives - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // The wait is cancelled to signal we've got a new request - act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write successful - act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); BOOST_TEST(item.elm->is_written()); // Logs fix.check_log({ - {logger::level::info, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + }); +} + +// We correctly handle short writes +void test_short_writes() +{ + // Setup + fixture fix; + test_elem item1; + + // A request arrives before the writer starts + fix.st.mpx.add(item1.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST(item1.elm->is_staged()); + + // We write a few bytes. It's not the entire message, so we write again + act = fix.fsm.resume(fix.st, error_code(), 2u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST(item1.elm->is_staged()); + + // We write some more bytes, but still not the entire message. + act = fix.fsm.resume(fix.st, error_code(), 5u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST(item1.elm->is_staged()); + + // A zero size write doesn't cause trouble + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST(item1.elm->is_staged()); + + // Complete writing the message (the entire payload is 24 bytes long) + act = fix.fsm.resume(fix.st, error_code(), 17u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + BOOST_TEST(item1.elm->is_written()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 2 bytes written." }, + {logger::level::debug, "Writer task: 5 bytes written." }, + {logger::level::debug, "Writer task: 0 bytes written." }, + {logger::level::debug, "Writer task: 17 bytes written."}, + }); +} + +// If no data arrives during the health check interval, a ping is written +void test_ping() +{ + // Setup + fixture fix; + error_code ec; + constexpr std::string_view ping_payload = "*2\r\n$4\r\nPING\r\n$8\r\nping_msg\r\n"; + + // Start. There is no request, so we wait + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // No request arrives during the wait interval so a ping is added + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST_EQ(fix.st.mpx.get_write_buffer(), ping_payload); + + // Write successful + act = fix.fsm.resume(fix.st, error_code(), ping_payload.size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // Simulate a successful response to the PING + constexpr std::string_view ping_response = "$8\r\nping_msg\r\n"; + read(fix.st.mpx, ping_response); + auto res = fix.st.mpx.consume(ec); + BOOST_TEST_EQ(ec, error_code()); + BOOST_TEST(res.first == consume_result::got_response); + BOOST_TEST_EQ(res.second, ping_response.size()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 28 bytes written."}, + }); +} + +// Disabled health checks don't cause trouble +void test_health_checks_disabled() +{ + // Setup + fixture fix; + test_elem item; + fix.st.cfg.health_check_interval = 0s; + + // A request arrives before the writer starts + fix.st.mpx.add(item.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(0s)); + BOOST_TEST(item.elm->is_staged()); + + // The write completes successfully. The request is written, and we go back to sleep. + act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(0s)); + BOOST_TEST(item.elm->is_written()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 24 bytes written."}, + }); +} + +// If the server answers with an error in PING, we log it and produce an error +void test_ping_error() +{ + // Setup + fixture fix; + error_code ec; + + // Start. There is no request, so we wait + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // No request arrives during the wait interval so a ping is added + act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + + // Write successful + const auto ping_size = fix.st.mpx.get_write_buffer().size(); + act = fix.fsm.resume(fix.st, error_code(), ping_size, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); + + // Simulate an error response to the PING + constexpr std::string_view ping_response = "-ERR: bad command\r\n"; + read(fix.st.mpx, ping_response); + auto res = fix.st.mpx.consume(ec); + BOOST_TEST_EQ(ec, error::resp3_simple_error); + BOOST_TEST(res.first == consume_result::got_response); + BOOST_TEST_EQ(res.second, ping_response.size()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 28 bytes written." }, + {logger::level::info, "Health checker: server answered ping with an error: ERR: bad command"}, }); } @@ -209,23 +380,51 @@ void test_write_error() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered, and the request is marked as staged - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // The write completes with an error (possibly with partial success). // The request is still staged, and the writer exits. // Use an error we control so we can check logs - act = fix.fsm.resume(error::empty_field, 2u, cancellation_type_t::none); + act = fix.fsm.resume(fix.st, error::empty_field, 2u, cancellation_type_t::none); BOOST_TEST_EQ(act, error_code(error::empty_field)); BOOST_TEST(item.elm->is_staged()); // Logs fix.check_log({ - {logger::level::info, "Writer task error: Expected field value is empty. [boost.redis:5]"}, + {logger::level::debug, "Writer task: 2 bytes written." }, + {logger::level::debug, "Writer task error: Expected field value is empty. [boost.redis:5]"}, + }); +} + +void test_write_timeout() +{ + // Setup + fixture fix; + test_elem item; + + // A request arrives before the writer starts + fix.st.mpx.add(item.elm); + + // Start. A write is triggered, and the request is marked as staged + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); + BOOST_TEST(item.elm->is_staged()); + + // The write times out, so it completes with operation_aborted + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, error_code(error::write_timeout)); + BOOST_TEST(item.elm->is_staged()); + + // Logs + fix.check_log({ + {logger::level::debug, "Writer task: 0 bytes written." }, + {logger::level::debug, + "Writer task error: Timeout while writing data to the server. [boost.redis:27]"}, }); } @@ -237,21 +436,22 @@ void test_cancel_write() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled and failed with operation_aborted - act = fix.fsm.resume(asio::error::operation_aborted, 2u, cancellation_type_t::terminal); + act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 2u, cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_staged()); // Logs fix.check_log({ - {logger::level::debug, "Writer task: cancelled (1)."}, + {logger::level::debug, "Writer task: 2 bytes written."}, + {logger::level::debug, "Writer task: cancelled (1)." }, }); } @@ -263,21 +463,23 @@ void test_cancel_write_edge() test_elem item; // A request arrives before the writer starts - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Start. A write is triggered - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::write); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::write_some(4s)); BOOST_TEST(item.elm->is_staged()); // Write cancelled but without error - act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::terminal); + act = fix.fsm + .resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_written()); // Logs fix.check_log({ - {logger::level::debug, "Writer task: cancelled (1)."}, + {logger::level::debug, "Writer task: 24 bytes written."}, + {logger::level::debug, "Writer task: cancelled (1)." }, }); } @@ -289,14 +491,18 @@ void test_cancel_wait() test_elem item; // Start. There is no request, so we wait - auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none); - BOOST_TEST_EQ(act, writer_action_type::wait); + auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none); + BOOST_TEST_EQ(act, writer_action::wait(4s)); // Sanity check: the writer doesn't touch the multiplexer after a cancellation - fix.mpx.add(item.elm); + fix.st.mpx.add(item.elm); // Cancel the wait, setting the cancellation state - act = fix.fsm.resume(asio::error::operation_aborted, 0u, asio::cancellation_type_t::terminal); + act = fix.fsm.resume( + fix.st, + asio::error::operation_aborted, + 0u, + asio::cancellation_type_t::terminal); BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted)); BOOST_TEST(item.elm->is_waiting()); @@ -313,8 +519,14 @@ int main() test_single_request(); test_request_arrives_while_writing(); test_no_request_at_startup(); + test_short_writes(); + test_health_checks_disabled(); + + test_ping(); + test_ping_error(); test_write_error(); + test_write_timeout(); test_cancel_write(); test_cancel_write_edge();