From fa475f09d3e90d143276ee3478d8f10dbb4f6898 Mon Sep 17 00:00:00 2001 From: "isabel@lotus" Date: Fri, 30 Nov 2018 15:34:42 -0800 Subject: [PATCH] clean up logs --- audit/CMakeLists.txt | 2 +- audit/audit.cpp | 33 ++++++++++++++++++++------------- node/node.cpp | 2 +- node/session.cpp | 4 ++-- options/simple_options.cpp | 5 +---- 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/audit/CMakeLists.txt b/audit/CMakeLists.txt index 1c1bdef4..d5f6e489 100644 --- a/audit/CMakeLists.txt +++ b/audit/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(audit audit.cpp ) -target_link_libraries(audit proto) +target_link_libraries(audit utils proto) add_dependencies(audit jsoncpp) target_include_directories(audit PRIVATE ${JSONCPP_INCLUDE_DIRS} ${PROTO_INCLUDE_DIR}) add_subdirectory(test) diff --git a/audit/audit.cpp b/audit/audit.cpp index 3f72366a..94947f58 100644 --- a/audit/audit.cpp +++ b/audit/audit.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include using namespace bzn; @@ -60,15 +62,16 @@ audit::start() if (this->monitor_endpoint) { - LOG(info) << boost::format("Audit module running, will send stats to %1%:%2%") + LOG(info) << boost::format("audit module running, will send stats to %1%:%2%") % this->monitor_endpoint->address().to_string() % this->monitor_endpoint->port(); } else { - LOG(info) "Audit module running, but not sending stats anywhere because no monitor configured"; + LOG(info) "audit module running, but not sending stats anywhere because no monitor configured"; } + LOG(debug) << "starting primary alive timer"; this->reset_primary_alive_timer(); }); } @@ -76,10 +79,8 @@ audit::start() void audit::reset_primary_alive_timer() { - LOG(debug) << "starting primary alive timer"; this->primary_dead_count = 0; - // We use the leader_alive_timer here because introducing a new timer would make the tests annoyingly complex this->primary_alive_timer->cancel(); this->primary_alive_timer->expires_from_now(this->primary_timeout); this->primary_alive_timer->async_wait(std::bind(&audit::handle_primary_alive_timeout, shared_from_this(), std::placeholders::_1)); @@ -88,11 +89,17 @@ audit::reset_primary_alive_timer() void audit::handle_primary_alive_timeout(const boost::system::error_code& ec) { - if (ec) + if (ec && ec != boost::system::errc::operation_canceled) { - LOG(debug) << "Primary alive timeout canceled " << ec.message(); + LOG(error) << "primary alive timeout canceled " << ec.message(); return; } + if (ec == boost::system::errc::operation_canceled) + { + LOG(trace) << "primary still alive"; + return; + } + this->report_error(bzn::NO_PRIMARY_METRIC_NAME, str(boost::format("No primary alive [%1%]") % ++(this->primary_dead_count))); this->primary_alive_timer->expires_from_now(this->primary_timeout); @@ -120,7 +127,7 @@ audit::send_to_monitor(const std::string& stat) return; } - LOG(debug) << boost::format("Sending stat '%1%' to monitor at %2%:%3%") + LOG(trace) << boost::format("sending stat '%1%' to monitor at %2%:%3%") % stat % this->monitor_endpoint->address().to_string() % this->monitor_endpoint->port(); @@ -146,10 +153,10 @@ audit::handle(const bzn_envelope& env, std::shared_ptr sessio audit_message message; if (!message.ParseFromString(env.audit())) { - LOG(error) << "Failed to parse audit message from " << env.sender(); + LOG(error) << "failed to parse audit message from " << env.sender(); } - LOG(debug) << "Got audit message" << message.DebugString(); + LOG(trace) << "got audit message" << message.DebugString(); if (message.has_pbft_commit()) { @@ -165,7 +172,7 @@ audit::handle(const bzn_envelope& env, std::shared_ptr sessio } else { - LOG(error) << "Got an unknown audit message? "; + LOG(error) << "got an unknown audit message? " << message.DebugString(); } session->close(); @@ -177,7 +184,7 @@ void audit::handle_primary_status(const primary_status& primary_status) if (this->recorded_primaries.count(primary_status.view()) == 0) { - LOG(info) << "audit recording that primary of view " << primary_status.view() << " is '" << primary_status.primary() << "'"; + LOG(info) << "observed primary of view " << primary_status.view() << " to be '" << primary_status.primary() << "'"; this->send_to_monitor(bzn::PRIMARY_HEARD_METRIC_NAME+bzn::STATSD_COUNTER_FORMAT); this->recorded_primaries[primary_status.view()] = primary_status.primary(); this->trim(); @@ -185,7 +192,7 @@ void audit::handle_primary_status(const primary_status& primary_status) else if (this->recorded_primaries[primary_status.view()] != primary_status.primary()) { std::string err = str(boost::format( - "Conflicting primary elected! '%1%' is the recorded primary of view %2%, but '%3%' claims to be the primary of the same view.") + "Conflicting primary detected! '%1%' is the recorded primary of view %2%, but '%3%' claims to be the primary of the same view.") % this->recorded_primaries[primary_status.view()] % primary_status.view() % primary_status.primary()); @@ -204,7 +211,7 @@ audit::handle_pbft_commit(const pbft_commit_notification& commit) if (this->recorded_pbft_commits.count(commit.sequence_number()) == 0) { - LOG(info) << "audit recording that message '" << commit.operation() << "' is committed at sequence " << commit.sequence_number(); + LOG(debug) << "observed that message '" << bytes_to_debug_string(commit.operation()) << "' is committed at sequence " << commit.sequence_number(); this->recorded_pbft_commits[commit.sequence_number()] = commit.operation(); this->trim(); } diff --git a/node/node.cpp b/node/node.cpp index e6f3c71c..1258ca8f 100644 --- a/node/node.cpp +++ b/node/node.cpp @@ -108,7 +108,7 @@ node::do_accept() { auto ep = self->acceptor_socket->remote_endpoint(); - LOG(debug) << "connection from: " << ep.address() << ":" << ep.port(); + LOG(trace) << "connection from: " << ep.address() << ":" << ep.port(); auto ws = self->websocket->make_unique_websocket_stream( self->acceptor_socket->get_tcp_socket()); diff --git a/node/session.cpp b/node/session.cpp index 668893ad..54975dee 100644 --- a/node/session.cpp +++ b/node/session.cpp @@ -78,7 +78,7 @@ session::do_read() if (ec) { // don't log close of websocket... - if (ec != boost::beast::websocket::error::closed) + if (ec != boost::beast::websocket::error::closed && ec != boost::asio::error::eof) { LOG(error) << "websocket read failed: " << ec.message(); } @@ -211,7 +211,7 @@ session::start_idle_timeout() { this->idle_timer->cancel(); - LOG(debug) << "resetting " << this->ws_idle_timeout.count() << "ms idle timer"; + LOG(trace) << "resetting " << this->ws_idle_timeout.count() << "ms idle timer"; this->idle_timer->expires_from_now(this->ws_idle_timeout); diff --git a/options/simple_options.cpp b/options/simple_options.cpp index a8de968f..08dd0564 100644 --- a/options/simple_options.cpp +++ b/options/simple_options.cpp @@ -112,7 +112,7 @@ simple_options::build_options() po::options_description audit("Audit"); audit.add_options() (AUDIT_ENABLED.c_str(), - po::value()->default_value(false), + po::value()->default_value(true), "enable audit module") (AUDIT_MEM_SIZE.c_str(), po::value()->default_value(10000), @@ -127,9 +127,6 @@ simple_options::build_options() po::options_description experimental("Experimental"); experimental.add_options() - (AUDIT_MEM_SIZE.c_str(), - po::value()->default_value(10000), - "max size of audit datastructures") (PBFT_ENABLED.c_str(), po::value()->default_value(false), "use pbft consensus instead of raft (experimental)")