Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

Commit

Permalink
fd: update failure detection log (#256)
Browse files Browse the repository at this point in the history
  • Loading branch information
hycdong authored and neverchanje committed May 30, 2019
1 parent f07999a commit 100afc7
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 22 deletions.
44 changes: 35 additions & 9 deletions src/dist/failure_detector/failure_detector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,14 @@ void failure_detector::check_all_records()
if (record.is_alive &&
now + _check_interval_milliseconds - record.last_send_time_for_beacon_with_ack >
_lease_milliseconds) {
derror("master %s disconnected, now=%" PRId64 ", last_send_time=%" PRId64
", now+check_interval-last_send_time=%" PRId64,
record.node.to_string(),
now,
record.last_send_time_for_beacon_with_ack,
now + _check_interval_milliseconds -
record.last_send_time_for_beacon_with_ack);

expire.push_back(record.node);
record.is_alive = false;

Expand Down Expand Up @@ -264,6 +272,13 @@ void failure_detector::check_all_records()

if (record.is_alive != false &&
now - record.last_beacon_recv_time > _grace_milliseconds) {
derror("worker %s disconnected, now=%" PRId64 ", last_beacon_recv_time=%" PRId64
", now-last_recv=%" PRId64,
record.node.to_string(),
now,
record.last_beacon_recv_time,
now - record.last_beacon_recv_time);

expire.push_back(record.node);
record.is_alive = false;

Expand Down Expand Up @@ -324,12 +339,20 @@ void failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon
// update last_beacon_recv_time
itr->second.last_beacon_recv_time = now;

ddebug("master %s update last_beacon_recv_time=%" PRId64,
itr->second.node.to_string(),
itr->second.last_beacon_recv_time);

if (itr->second.is_alive == false) {
itr->second.is_alive = true;

report(node, false, true);
on_worker_connected(node);
}
} else {
ddebug("now[%" PRId64 "] <= last_recv_time[%" PRId64 "]",
now,
itr->second.last_beacon_recv_time);
}
}

Expand Down Expand Up @@ -385,14 +408,13 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack

if (!is_time_greater_than(beacon_send_time, record.last_send_time_for_beacon_with_ack)) {
// out-dated beacon acks, do nothing
dinfo("ignore out dated beacon acks, send_time(%lld), last_beacon(%lld)",
beacon_send_time,
record.last_send_time_for_beacon_with_ack);
ddebug("ignore out dated beacon acks, send_time(%lld), last_beacon(%lld)",
beacon_send_time,
record.last_send_time_for_beacon_with_ack);
return false;
}

// now the ack is applicable

if (err != ERR_OK) {
return true;
}
Expand All @@ -409,6 +431,10 @@ bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack
record.last_send_time_for_beacon_with_ack = beacon_send_time;
record.rejected = false;

ddebug("worker %s send beacon succeed, update last_send_time=%" PRId64,
record.node.to_string(),
record.last_send_time_for_beacon_with_ack);

if (record.is_alive == false &&
now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds) {
// report master connected
Expand All @@ -428,7 +454,7 @@ bool failure_detector::unregister_master(::dsn::rpc_address node)
if (it != _masters.end()) {
it->second.send_beacon_timer->cancel(true);
_masters.erase(it);
dinfo("unregister master[%s] successfully", node.to_string());
ddebug("unregister master[%s] successfully", node.to_string());
return true;
} else {
ddebug("unregister master[%s] failed, cannot find it in FD", node.to_string());
Expand Down Expand Up @@ -510,10 +536,10 @@ void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time)
beacon.to_addr = target;
beacon.__set_start_time(static_cast<int64_t>(dsn::utils::process_start_millis()));

dinfo("send ping message, from[%s], to[%s], time[%" PRId64 "]",
beacon.from_addr.to_string(),
beacon.to_addr.to_string(),
time);
ddebug("send ping message, from[%s], to[%s], time[%" PRId64 "]",
beacon.from_addr.to_string(),
beacon.to_addr.to_string(),
time);

::dsn::rpc::call(target,
RPC_FD_FAILURE_DETECTOR_PING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,14 @@ void slave_failure_detector_with_multimaster::end_ping(::dsn::error_code err,
const fd::beacon_ack &ack,
void *)
{
dinfo("end ping result, error[%s], time[%" PRId64
"], ack.this_node[%s], ack.primary_node[%s], ack.is_master[%s], ack.allowed[%s]",
err.to_string(),
ack.time,
ack.this_node.to_string(),
ack.primary_node.to_string(),
ack.is_master ? "true" : "false",
ack.allowed ? "true" : "false");
ddebug("end ping result, error[%s], time[%" PRId64
"], ack.this_node[%s], ack.primary_node[%s], ack.is_master[%s], ack.allowed[%s]",
err.to_string(),
ack.time,
ack.this_node.to_string(),
ack.primary_node.to_string(),
ack.is_master ? "true" : "false",
ack.allowed ? "true" : "false");

zauto_lock l(failure_detector::_lock);
if (!failure_detector::end_ping_internal(err, ack))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,11 +261,14 @@ void meta_server_failure_detector::on_ping(const fd::beacon_msg &beacon,
failure_detector::on_ping_internal(beacon, ack);
}

dinfo("on_ping, is_master(%s), from_node(%s), this_node(%s), primary_node(%s)",
ack.is_master ? "true" : "false",
beacon.from_addr.to_string(),
ack.this_node.to_string(),
ack.primary_node.to_string());
ddebug("on_ping, beacon send time[%ld], is_master(%s), from_node(%s), this_node(%s), "
"primary_node(%s)",
ack.time,
ack.is_master ? "true" : "false",
beacon.from_addr.to_string(),
ack.this_node.to_string(),
ack.primary_node.to_string());

reply(ack);
}

Expand Down

0 comments on commit 100afc7

Please sign in to comment.