From f90b8bc1a2b3ffa5ba52c0992c1c5ff52e5ea104 Mon Sep 17 00:00:00 2001 From: Joao Eduardo Luis Date: Thu, 7 Jan 2016 11:05:44 +0000 Subject: [PATCH 1/2] mon: Monitor: adaptative clock skew detection interval When in the presence of a clock skew, adjust the checking interval according to how many rounds have gone by since the last clean check. If a skew is detected, instead of waiting an additional 300 seconds we will perform the check more frequently, gradually backing off the frequency if the skew is still in place (up to a maximum of 'mon_timecheck_interval', default: 300s). This will help with transient skews. Signed-off-by: Joao Eduardo Luis (cherry pick from commit 45e16d0d7407cf4e886dde9321bb91c1a087776d) Conflicts: src/common/config_opts.h Merge the change line. src/mon/Monitor.h handle_timecheck_leader(MonOpRequestRef op) was replaced with handle_timecheck_leader(MTimeCheck *m) also for handle_timecheck_peon and handle_timecheck. --- src/common/config_opts.h | 1 + src/mon/Monitor.cc | 71 +++++++++++++++++++++++++++++++++++++--- src/mon/Monitor.h | 22 +++++++++++++ 3 files changed, 89 insertions(+), 5 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 76763e4ee9bde..f774ca9f77963 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -204,6 +204,7 @@ OPTION(mon_clock_drift_allowed, OPT_FLOAT, .050) // allowed clock drift between OPTION(mon_clock_drift_warn_backoff, OPT_FLOAT, 5) // exponential backoff for clock drift warnings OPTION(mon_timecheck_interval, OPT_FLOAT, 300.0) // on leader, timecheck (clock drift check) interval (seconds) OPTION(mon_accept_timeout, OPT_FLOAT, 10.0) // on leader, if paxos update isn't accepted +OPTION(mon_timecheck_skew_interval, OPT_FLOAT, 30.0) // on leader, timecheck (clock drift check) interval when in presence of a skew (seconds) OPTION(mon_pg_create_interval, OPT_FLOAT, 30.0) // no more than every 30s OPTION(mon_pg_stuck_threshold, OPT_INT, 300) // number of seconds after which pgs can be considered inactive, unclean, or stale (see doc/control.rst under dump_stuck for more info) OPTION(mon_pg_warn_min_per_osd, OPT_INT, 30) // min # pgs per (in) osd before we warn the admin diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index d62d7cf1e1cc4..7662f07faa157 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -178,6 +178,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, timecheck_round(0), timecheck_acks(0), + timecheck_rounds_since_clean(0), timecheck_event(NULL), probe_timeout_event(NULL), @@ -3685,8 +3686,7 @@ void Monitor::timecheck_start_round() timecheck(); out: dout(10) << __func__ << " setting up next event" << dendl; - timecheck_event = new C_TimeCheck(this); - timer.add_event_after(g_conf->mon_timecheck_interval, timecheck_event); + timecheck_reset_event(); } void Monitor::timecheck_finish_round(bool success) @@ -3700,6 +3700,7 @@ void Monitor::timecheck_finish_round(bool success) assert(timecheck_waiting.empty()); assert(timecheck_acks == quorum.size()); timecheck_report(); + timecheck_check_skews(); return; } @@ -3733,6 +3734,65 @@ void Monitor::timecheck_cleanup() timecheck_waiting.clear(); timecheck_skews.clear(); timecheck_latencies.clear(); + + timecheck_rounds_since_clean = 0; +} + +void Monitor::timecheck_reset_event() +{ + if (timecheck_event) { + timer.cancel_event(timecheck_event); + timecheck_event = NULL; + } + + double delay = + cct->_conf->mon_timecheck_skew_interval * timecheck_rounds_since_clean; + + if (delay <= 0 || delay > cct->_conf->mon_timecheck_interval) { + delay = cct->_conf->mon_timecheck_interval; + } + + dout(10) << __func__ << " delay " << delay + << " rounds_since_clean " << timecheck_rounds_since_clean + << dendl; + + timecheck_event = new C_TimeCheck(this); + timer.add_event_after(delay, timecheck_event); +} + +void Monitor::timecheck_check_skews() +{ + dout(10) << __func__ << dendl; + assert(is_leader()); + assert((timecheck_round % 2) == 0); + if (monmap->size() == 1) { + assert(0 == "We are alone; we shouldn't have gotten here!"); + return; + } + assert(timecheck_latencies.size() == timecheck_skews.size()); + + bool found_skew = false; + for (map::iterator p = timecheck_skews.begin(); + p != timecheck_skews.end(); ++p) { + + double abs_skew; + if (timecheck_has_skew(p->second, &abs_skew)) { + dout(10) << __func__ + << " " << p->first << " skew " << abs_skew << dendl; + found_skew = true; + } + } + + if (found_skew) { + ++timecheck_rounds_since_clean; + timecheck_reset_event(); + } else if (timecheck_rounds_since_clean > 0) { + dout(1) << __func__ + << " no clock skews found after " << timecheck_rounds_since_clean + << " rounds" << dendl; + timecheck_rounds_since_clean = 0; + } + } void Monitor::timecheck_report() @@ -3755,7 +3815,8 @@ void Monitor::timecheck_report() m->epoch = get_epoch(); m->round = timecheck_round; - for (map::iterator it = timecheck_skews.begin(); it != timecheck_skews.end(); ++it) { + for (map::iterator it = timecheck_skews.begin(); + it != timecheck_skews.end(); ++it) { double skew = it->second; double latency = timecheck_latencies[it->first]; @@ -3814,10 +3875,10 @@ health_status_t Monitor::timecheck_status(ostringstream &ss, const double latency) { health_status_t status = HEALTH_OK; - double abs_skew = (skew_bound > 0 ? skew_bound : -skew_bound); assert(latency >= 0); - if (abs_skew > g_conf->mon_clock_drift_allowed) { + double abs_skew; + if (timecheck_has_skew(skew_bound, &abs_skew)) { status = HEALTH_WARN; ss << "clock skew " << abs_skew << "s" << " > max " << g_conf->mon_clock_drift_allowed << "s"; diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 0d3978a5e7831..89af554dc52a6 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -54,6 +54,7 @@ #include "include/memory.h" #include "include/str_map.h" #include +#include #define CEPH_MON_PROTOCOL 13 /* cluster internal */ @@ -463,6 +464,15 @@ class Monitor : public Dispatcher, version_t timecheck_round; unsigned int timecheck_acks; utime_t timecheck_round_start; + /* When we hit a skew we will start a new round based off of + * 'mon_timecheck_skew_interval'. Each new round will be backed off + * until we hit 'mon_timecheck_interval' -- which is the typical + * interval when not in the presence of a skew. + * + * This variable tracks the number of rounds with skews since last clean + * so that we can report to the user and properly adjust the backoff. + */ + uint64_t timecheck_rounds_since_clean; /** * Time Check event. */ @@ -482,6 +492,8 @@ class Monitor : public Dispatcher, void timecheck_finish_round(bool success = true); void timecheck_cancel_round(); void timecheck_cleanup(); + void timecheck_reset_event(); + void timecheck_check_skews(); void timecheck_report(); void timecheck(); health_status_t timecheck_status(ostringstream &ss, @@ -490,6 +502,16 @@ class Monitor : public Dispatcher, void handle_timecheck_leader(MTimeCheck *m); void handle_timecheck_peon(MTimeCheck *m); void handle_timecheck(MTimeCheck *m); + + /** + * Returns 'true' if this is considered to be a skew; 'false' otherwise. + */ + bool timecheck_has_skew(const double skew_bound, double *abs) const { + double abs_skew = std::fabs(skew_bound); + if (abs) + *abs = abs_skew; + return (abs_skew > g_conf->mon_clock_drift_allowed); + } /** * @} */ From 01672b4afd6f886d6bb8527427663e062ab9d100 Mon Sep 17 00:00:00 2001 From: Joao Eduardo Luis Date: Thu, 7 Jan 2016 11:20:36 +0000 Subject: [PATCH 2/2] mon: Monitor: get rid of weighted clock skew reports By weighting the reports we were making it really hard to get rid of a clock skew warning once the cause had been fixed. Instead, as soon as we get a clean bill of health, let's run a new round and soon as possible and ascertain whether that was a transient fix or for realsies. That should be better than the alternative of waiting for an hour or something (for a large enough skew) for the warning to go away - and with it, the admin's sanity ("WHAT AM I DOING WRONG???"). Fixes: #14175 Signed-off-by: Joao Eduardo Luis (cherry pick from commit 17d8ff429c7dca8fc1ada6e7cc8a7c4924a22e28) --- src/mon/Monitor.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 7662f07faa157..bdaa1c1a864cf 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3790,6 +3790,10 @@ void Monitor::timecheck_check_skews() dout(1) << __func__ << " no clock skews found after " << timecheck_rounds_since_clean << " rounds" << dendl; + // make sure the skews are really gone and not just a transient success + // this will run just once if not in the presence of skews again. + timecheck_rounds_since_clean = 1; + timecheck_reset_event(); timecheck_rounds_since_clean = 0; } @@ -3992,11 +3996,7 @@ void Monitor::handle_timecheck_leader(MTimeCheck *m) << " delta " << delta << " skew_bound " << skew_bound << " latency " << latency << dendl; - if (timecheck_skews.count(other) == 0) { - timecheck_skews[other] = skew_bound; - } else { - timecheck_skews[other] = (timecheck_skews[other]*0.8)+(skew_bound*0.2); - } + timecheck_skews[other] = skew_bound; timecheck_acks++; if (timecheck_acks == quorum.size()) {