From 0f2dee9aa48a00a7f2f809cd4d20e98df771da81 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 14 Feb 2017 15:00:09 -0500 Subject: [PATCH] osd/PG: restrict want_acting to up+acting on recovery completion On recovery completion we recalculate want_acting to see if we should add recently backfilled osds into acting. However, at this point we may have gotten infos from others OSDs outside of up/acting that could be used for want_acting. We currently assert that only up/acting osds are used in PG::RecoveryState::Active::react(const AdvMap&), so we must restrict want_acting to up/acting here. We could remove this restriction, but it would mean 1) checking on every map change that want_acting hasn't been invalidated, and if so, recalculating want_acting and requesting a new pg_temp. Also, presumably 2) on each new info, checking whether we can construct a better want_acting, and if so, doing it. That would be a good thing, but is a more complicated change. In reality this case comes up very rarely, so simply make our post-recovery want_acting calculation limit itself to up+acting. See 1db67c443d84dc5d1ff53cc820fdfd4a2128b680 for the assertion. Signed-off-by: Sage Weil --- src/osd/PG.cc | 49 +++++++++++++++++++++++++++++++++++++------------ src/osd/PG.h | 19 +++++++++++-------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index c31cc7bd32625..56a7c43e89d26 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1005,7 +1005,9 @@ PG::Scrubber::~Scrubber() {} * 3) Prefer current primary */ map::const_iterator PG::find_best_info( - const map &infos, bool *history_les_bound) const + const map &infos, + bool restrict_to_up_acting, + bool *history_les_bound) const { assert(history_les_bound); /* See doc/dev/osd_internals/last_epoch_started.rst before attempting @@ -1045,6 +1047,9 @@ map::const_iterator PG::find_best_info( for (map::const_iterator p = infos.begin(); p != infos.end(); ++p) { + if (restrict_to_up_acting && !is_up(p->first) && + !is_acting(p->first)) + continue; // Only consider peers with last_update >= min_last_update_acceptable if (p->second.last_update < min_last_update_acceptable) continue; @@ -1103,17 +1108,19 @@ void PG::calc_ec_acting( pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *_want, set *backfill, set *acting_backfill, pg_shard_t *want_primary, - ostream &ss) { + ostream &ss) +{ vector want(size, CRUSH_ITEM_NONE); map > all_info_by_shard; unsigned usable = 0; - for(map::const_iterator i = all_info.begin(); - i != all_info.end(); - ++i) { + for (map::const_iterator i = all_info.begin(); + i != all_info.end(); + ++i) { all_info_by_shard[i->first.shard].insert(i->first); } for (uint8_t i = 0; i < want.size(); ++i) { @@ -1140,7 +1147,7 @@ void PG::calc_ec_acting( ss << " selecting acting[i]: " << pg_shard_t(acting[i], shard_id_t(i)) << std::endl; want[i] = acting[i]; ++usable; - } else { + } else if (!restrict_to_up_acting) { for (set::iterator j = all_info_by_shard[shard_id_t(i)].begin(); j != all_info_by_shard[shard_id_t(i)].end(); ++j) { @@ -1189,6 +1196,7 @@ void PG::calc_replicated_acting( pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, @@ -1196,7 +1204,8 @@ void PG::calc_replicated_acting( ostream &ss) { ss << "calc_acting newest update on osd." << auth_log_shard->first - << " with " << auth_log_shard->second << std::endl; + << " with " << auth_log_shard->second + << (restrict_to_up_acting ? " restrict_to_up_acting" : "") << std::endl; pg_shard_t auth_log_shard_id = auth_log_shard->first; // select primary @@ -1287,6 +1296,9 @@ void PG::calc_replicated_acting( } } + if (restrict_to_up_acting) { + return; + } for (map::const_iterator i = all_info.begin(); i != all_info.end(); ++i) { @@ -1323,8 +1335,19 @@ void PG::calc_replicated_acting( * * calculate the desired acting, and request a change with the monitor * if it differs from the current acting. + * + * if restrict_to_up_acting=true, we filter out anything that's not in + * up/acting. in order to lift this restriction, we need to + * 1) check whether it's worth switching the acting set any time we get + * a new pg info (not just here, when recovery finishes) + * 2) check whether anything in want_acting went down on each new map + * (and, if so, calculate a new want_acting) + * 3) remove the assertion in PG::RecoveryState::Active::react(const AdvMap) + * TODO! */ -bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) +bool PG::choose_acting(pg_shard_t &auth_log_shard_id, + bool restrict_to_up_acting, + bool *history_les_bound) { map all_info(peer_info.begin(), peer_info.end()); all_info[pg_whoami] = info; @@ -1336,7 +1359,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) } map::const_iterator auth_log_shard = - find_best_info(all_info, history_les_bound); + find_best_info(all_info, restrict_to_up_acting, history_les_bound); if (auth_log_shard == all_info.end()) { if (up != acting) { @@ -1390,6 +1413,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) up_primary, all_info, compat_mode, + restrict_to_up_acting, &want, &want_backfill, &want_acting_backfill, @@ -1405,6 +1429,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) up_primary, all_info, compat_mode, + restrict_to_up_acting, &want, &want_backfill, &want_acting_backfill, @@ -6773,7 +6798,7 @@ PG::RecoveryState::Recovered::Recovered(my_context ctx) // adjust acting set? (e.g. because backfill completed...) bool history_les_bound = false; if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard, - &history_les_bound)) + true, &history_les_bound)) assert(pg->want_acting.size()); if (context< Active >().all_replicas_activated) @@ -7564,8 +7589,8 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) PG *pg = context< RecoveryMachine >().pg; // adjust acting? - if (!pg->choose_acting(auth_log_shard, - &context< Peering >().history_les_bound)) { + if (!pg->choose_acting(auth_log_shard, false, + &context< Peering >().history_les_bound)) { if (!pg->want_acting.empty()) { post_event(NeedActingChange()); } else { diff --git a/src/osd/PG.h b/src/osd/PG.h index ab81b9a9696af..9cbb9d93ce7fa 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -870,17 +870,16 @@ class PG : protected DoutPrefixProvider { return actingbackfill.count(osd); } bool is_acting(pg_shard_t osd) const { - if (pool.info.ec_pool()) { - return acting.size() > (unsigned)osd.shard && acting[osd.shard] == osd.osd; - } else { - return std::find(acting.begin(), acting.end(), osd.osd) != acting.end(); - } + return has_shard(pool.info.ec_pool(), acting, osd); } bool is_up(pg_shard_t osd) const { - if (pool.info.ec_pool()) { - return up.size() > (unsigned)osd.shard && up[osd.shard] == osd.osd; + return has_shard(pool.info.ec_pool(), up, osd); + } + static bool has_shard(bool ec, const vector& v, pg_shard_t osd) { + if (ec) { + return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd; } else { - return std::find(up.begin(), up.end(), osd.osd) != up.end(); + return std::find(v.begin(), v.end(), osd.osd) != v.end(); } } @@ -983,6 +982,7 @@ class PG : protected DoutPrefixProvider { map::const_iterator find_best_info( const map &infos, + bool restrict_to_up_acting, bool *history_les_bound) const; static void calc_ec_acting( map::const_iterator auth_log_shard, @@ -993,6 +993,7 @@ class PG : protected DoutPrefixProvider { pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, @@ -1007,12 +1008,14 @@ class PG : protected DoutPrefixProvider { pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, pg_shard_t *want_primary, ostream &ss); bool choose_acting(pg_shard_t &auth_log_shard, + bool restrict_to_up_acting, bool *history_les_bound); void build_might_have_unfound(); void activate(