Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mon, osd: fix potential collided *Up Set* after PG remapping #20653

Merged
merged 1 commit into from Mar 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/crush/CrushWrapper.cc
Expand Up @@ -768,6 +768,25 @@ int CrushWrapper::get_children(int id, list<int> *children) const
return b->size;
}

int CrushWrapper::get_rule_failure_domain(int rule_id)
{
crush_rule *rule = get_rule(rule_id);
if (IS_ERR(rule)) {
return -ENOENT;
}
int type = 0; // default to osd-level
for (unsigned s = 0; s < rule->len; ++s) {
if ((rule->steps[s].op == CRUSH_RULE_CHOOSE_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSE_INDEP ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_INDEP) &&
rule->steps[s].arg2 > type) {
type = rule->steps[s].arg2;
}
}
return type;
}

int CrushWrapper::_get_leaves(int id, list<int> *leaves) const
{
assert(leaves);
Expand Down
7 changes: 7 additions & 0 deletions src/crush/CrushWrapper.h
Expand Up @@ -729,6 +729,13 @@ class CrushWrapper {
*/
int get_children(int id, list<int> *children) const;

/**
* get failure-domain type of a specific crush rule
* @param rule_id crush rule id
* @return type of failure-domain or a negative errno on error.
*/
int get_rule_failure_domain(int rule_id);

/**
* enumerate leaves(devices) of given node
*
Expand Down
3 changes: 3 additions & 0 deletions src/mon/OSDMonitor.cc
Expand Up @@ -1237,6 +1237,9 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
}
}

// clean inappropriate pg_upmap/pg_upmap_items (if any)
osdmap.maybe_remove_pg_upmaps(cct, osdmap, &pending_inc);

// features for osdmap and its incremental
uint64_t features = mon->get_quorum_con_features();

Expand Down
106 changes: 106 additions & 0 deletions src/osd/OSDMap.cc
Expand Up @@ -1614,6 +1614,112 @@ void OSDMap::clean_temps(CephContext *cct,
}
}

void OSDMap::maybe_remove_pg_upmaps(CephContext *cct,
const OSDMap& osdmap,
Incremental *pending_inc)
{
ldout(cct, 10) << __func__ << dendl;
OSDMap tmpmap;
tmpmap.deepish_copy_from(osdmap);
tmpmap.apply_incremental(*pending_inc);

for (auto& p : tmpmap.pg_upmap) {
ldout(cct, 10) << __func__ << " pg_upmap entry "
<< "[" << p.first << ":" << p.second << "]"
<< dendl;
auto crush_rule = tmpmap.get_pg_pool_crush_rule(p.first);
if (crush_rule < 0) {
lderr(cct) << __func__ << " unable to load crush-rule of pg "
<< p.first << dendl;
continue;
}
auto type = tmpmap.crush->get_rule_failure_domain(crush_rule);
if (type < 0) {
lderr(cct) << __func__ << " unable to load failure-domain-type of pg "
<< p.first << dendl;
continue;
}
ldout(cct, 10) << __func__ << " pg " << p.first
<< " crush-rule-id " << crush_rule
<< " failure-domain-type " << type
<< dendl;
vector<int> raw;
int primary;
tmpmap.pg_to_raw_up(p.first, &raw, &primary);
set<int> parents;
bool collide = false;
for (auto osd : raw) {
auto parent = tmpmap.crush->get_parent_of_type(osd, type);
auto r = parents.insert(parent);
if (!r.second) {
collide = true;
break;
}
}
if (collide) {
ldout(cct, 10) << __func__ << " removing invalid pg_upmap "
<< "[" << p.first << ":" << p.second << "]"
<< ", final mapping result will be: " << raw
<< dendl;
auto it = pending_inc->new_pg_upmap.find(p.first);
if (it != pending_inc->new_pg_upmap.end()) {
pending_inc->new_pg_upmap.erase(it);
}
if (osdmap.pg_upmap.count(p.first)) {
pending_inc->old_pg_upmap.insert(p.first);
}
}
}
for (auto& p : tmpmap.pg_upmap_items) {
ldout(cct, 10) << __func__ << " pg_upmap_items entry "
<< "[" << p.first << ":" << p.second << "]"
<< dendl;
auto crush_rule = tmpmap.get_pg_pool_crush_rule(p.first);
if (crush_rule < 0) {
lderr(cct) << __func__ << " unable to load crush-rule of pg "
<< p.first << dendl;
continue;
}
auto type = tmpmap.crush->get_rule_failure_domain(crush_rule);
if (type < 0) {
lderr(cct) << __func__ << " unable to load failure-domain-type of pg "
<< p.first << dendl;
continue;
}
ldout(cct, 10) << __func__ << " pg " << p.first
<< " crush_rule_id " << crush_rule
<< " failure_domain_type " << type
<< dendl;
vector<int> raw;
int primary;
tmpmap.pg_to_raw_up(p.first, &raw, &primary);
set<int> parents;
bool collide = false;
for (auto osd : raw) {
auto parent = tmpmap.crush->get_parent_of_type(osd, type);
auto r = parents.insert(parent);
if (!r.second) {
collide = true;
break;
}
}
if (collide) {
ldout(cct, 10) << __func__ << " removing invalid pg_upmap_items "
<< "[" << p.first << ":" << p.second << "]"
<< ", final mapping result will be: " << raw
<< dendl;
// This is overkilling, but simpler..
auto it = pending_inc->new_pg_upmap_items.find(p.first);
if (it != pending_inc->new_pg_upmap_items.end()) {
pending_inc->new_pg_upmap_items.erase(it);
}
if (osdmap.pg_upmap_items.count(p.first)) {
pending_inc->old_pg_upmap_items.insert(p.first);
}
}
}
}

int OSDMap::apply_incremental(const Incremental &inc)
{
new_blacklist_entries = false;
Expand Down
13 changes: 13 additions & 0 deletions src/osd/OSDMap.h
Expand Up @@ -988,6 +988,10 @@ class OSDMap {
*/
uint64_t get_up_osd_features() const;

void maybe_remove_pg_upmaps(CephContext *cct,
const OSDMap& osdmap,
Incremental *pending_inc);

int apply_incremental(const Incremental &inc);

/// try to re-use/reference addrs in oldmap from newmap
Expand Down Expand Up @@ -1068,6 +1072,15 @@ class OSDMap {
return p->get_size();
}

int get_pg_pool_crush_rule(pg_t pgid) const {
if (!pg_exists(pgid)) {
return -ENOENT;
}
const pg_pool_t *p = get_pg_pool(pgid.pool());
assert(p);
return p->get_crush_rule();
}

private:
/// pg -> (raw osd list)
void _pg_to_raw_osds(
Expand Down