Skip to content

Commit

Permalink
mon/OSDMonitor: simplify failure reporters vs reports logic
Browse files Browse the repository at this point in the history
Since each OSD only sends a failure report for a given peer once,
we don't need to count reports vs reporters separately.  (This was
probably a bad idea anyway.)  Remove this logic and the associated
config option.

Reported-by: Greg Farnum <gfarnum@redhat.com>
Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Nov 23, 2015
1 parent 53f2c7f commit 0269a0c
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 26 deletions.
1 change: 0 additions & 1 deletion src/common/config_opts.h
Expand Up @@ -277,7 +277,6 @@ OPTION(mon_sync_debug_provider, OPT_INT, -1) // monitor to be used as the sync p
OPTION(mon_sync_debug_provider_fallback, OPT_INT, -1) // monitor to be used as fallback if sync provider fails
OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request
OPTION(mon_osd_min_down_reporters, OPT_INT, 1) // number of OSDs who need to report a down OSD for it to count
OPTION(mon_osd_min_down_reports, OPT_INT, 3) // number of times a down OSD must be reported for it to count
OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care)
OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care)

Expand Down
29 changes: 16 additions & 13 deletions src/mon/OSDMonitor.cc
Expand Up @@ -1673,9 +1673,9 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
}

dout(10) << " osd." << target_osd << " has "
<< fi.reporters.size() << " reporters and "
<< fi.num_reports << " reports, "
<< grace << " grace (" << orig_grace << " + " << my_grace << " + " << peer_grace << "), max_failed_since " << max_failed_since
<< fi.reporters.size() << " reporters, "
<< grace << " grace (" << orig_grace << " + " << my_grace
<< " + " << peer_grace << "), max_failed_since " << max_failed_since
<< dendl;

// already pending failure?
Expand All @@ -1686,13 +1686,13 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
}

if (failed_for >= grace &&
((int)fi.reporters.size() >= g_conf->mon_osd_min_down_reporters) &&
(fi.num_reports >= g_conf->mon_osd_min_down_reports)) {
dout(1) << " we have enough reports/reporters to mark osd." << target_osd << " down" << dendl;
((int)fi.reporters.size() >= g_conf->mon_osd_min_down_reporters)) {
dout(1) << " we have enough reporters to mark osd." << target_osd
<< " down" << dendl;
pending_inc.new_state[target_osd] = CEPH_OSD_UP;

mon->clog->info() << osdmap.get_inst(target_osd) << " failed ("
<< fi.num_reports << " reports from " << (int)fi.reporters.size() << " peers after "
<< (int)fi.reporters.size() << " reporters after "
<< failed_for << " >= grace " << grace << ")\n";
return true;
}
Expand All @@ -1703,7 +1703,8 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op)
{
op->mark_osdmon_event(__func__);
MOSDFailure *m = static_cast<MOSDFailure*>(op->get_req());
dout(1) << "prepare_failure " << m->get_target() << " from " << m->get_orig_source_inst()
dout(1) << "prepare_failure " << m->get_target()
<< " from " << m->get_orig_source_inst()
<< " is reporting failure:" << m->if_osd_failed() << dendl;

int target_osd = m->get_target().name.num();
Expand All @@ -1713,7 +1714,9 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op)

// calculate failure time
utime_t now = ceph_clock_now(g_ceph_context);
utime_t failed_since = m->get_recv_stamp() - utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0);
utime_t failed_since =
m->get_recv_stamp() -
utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0);

if (m->if_osd_failed()) {
// add a report
Expand All @@ -1729,7 +1732,7 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op)
} else {
// remove the report
mon->clog->debug() << m->get_target() << " failure report canceled by "
<< m->get_orig_source_inst() << "\n";
<< m->get_orig_source_inst() << "\n";
if (failure_info.count(target_osd)) {
failure_info_t& fi = failure_info[target_osd];
list<MonOpRequestRef> ls;
Expand All @@ -1741,12 +1744,12 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op)
ls.pop_front();
}
if (fi.reporters.empty()) {
dout(10) << " removing last failure_info for osd." << target_osd << dendl;
dout(10) << " removing last failure_info for osd." << target_osd
<< dendl;
failure_info.erase(target_osd);
} else {
dout(10) << " failure_info for osd." << target_osd << " now "
<< fi.reporters.size() << " reporters and "
<< fi.num_reports << " reports" << dendl;
<< fi.reporters.size() << " reporters" << dendl;
}
} else {
dout(10) << " no failure_info for osd." << target_osd << dendl;
Expand Down
18 changes: 6 additions & 12 deletions src/mon/OSDMonitor.h
Expand Up @@ -51,22 +51,20 @@ class PGMap;

/// information about a particular peer's failure reports for one osd
struct failure_reporter_t {
int num_reports; ///< reports from this reporter
utime_t failed_since; ///< when they think it failed
MonOpRequestRef op; ///< most recent failure op request
MonOpRequestRef op; ///< failure op request

failure_reporter_t() : num_reports(0) {}
failure_reporter_t(utime_t s) : num_reports(1), failed_since(s) {}
failure_reporter_t() {}
failure_reporter_t(utime_t s) : failed_since(s) {}
~failure_reporter_t() { }
};

/// information about all failure reports for one osd
struct failure_info_t {
map<int, failure_reporter_t> reporters; ///< reporter -> # reports
map<int, failure_reporter_t> reporters; ///< reporter -> failed_since etc
utime_t max_failed_since; ///< most recent failed_since
int num_reports;

failure_info_t() : num_reports(0) {}
failure_info_t() {}

utime_t get_failed_since() {
if (max_failed_since == utime_t() && !reporters.empty()) {
Expand All @@ -83,18 +81,15 @@ struct failure_info_t {
// set the message for the latest report. return any old op request we had,
// if any, so we can discard it.
MonOpRequestRef add_report(int who, utime_t failed_since,
MonOpRequestRef op) {
MonOpRequestRef op) {
map<int, failure_reporter_t>::iterator p = reporters.find(who);
if (p == reporters.end()) {
if (max_failed_since == utime_t())
max_failed_since = failed_since;
else if (max_failed_since < failed_since)
max_failed_since = failed_since;
p = reporters.insert(map<int, failure_reporter_t>::value_type(who, failure_reporter_t(failed_since))).first;
} else {
p->second.num_reports++;
}
num_reports++;

MonOpRequestRef ret = p->second.op;
p->second.op = op;
Expand All @@ -116,7 +111,6 @@ struct failure_info_t {
map<int, failure_reporter_t>::iterator p = reporters.find(who);
if (p == reporters.end())
return;
num_reports -= p->second.num_reports;
reporters.erase(p);
if (reporters.empty())
max_failed_since = utime_t();
Expand Down

0 comments on commit 0269a0c

Please sign in to comment.