Skip to content

Commit

Permalink
Merge pull request #25418 from xiexingguo/wip-luminous-upmap-fixes
Browse files Browse the repository at this point in the history
luminous: osd: backport recent upmap fixes

Reviewed-by: Neha Ojha <nojha@redhat.com>
  • Loading branch information
xiexingguo committed Dec 13, 2018
2 parents 038add1 + f6e8427 commit 2083a34
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 16 deletions.
35 changes: 20 additions & 15 deletions src/osd/OSDMap.cc
Expand Up @@ -1637,12 +1637,12 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct,
to_check.insert(p.first);
}
for (auto& pg : to_check) {
auto crush_rule = tmpmap.get_pg_pool_crush_rule(pg);
if (crush_rule < 0) {
lderr(cct) << __func__ << " unable to load crush-rule of pg "
<< pg << dendl;
if (!tmpmap.pg_exists(pg)) {
ldout(cct, 0) << __func__ << " pg " << pg << " is gone" << dendl;
to_cancel.insert(pg);
continue;
}
auto crush_rule = tmpmap.get_pg_pool_crush_rule(pg);
map<int, float> weight_map;
auto it = rule_weight_map.find(crush_rule);
if (it == rule_weight_map.end()) {
Expand Down Expand Up @@ -1672,19 +1672,23 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct,
tmpmap.pg_to_raw_up(pg, &raw, &primary);
set<int> parents;
for (auto osd : raw) {
// skip non-existent/down osd for erasure-coded PGs
if (osd == CRUSH_ITEM_NONE)
continue;
if (type > 0) {
auto parent = tmpmap.crush->get_parent_of_type(osd, type, crush_rule);
if (parent >= 0) {
if (parent < 0) {
auto r = parents.insert(parent);
if (!r.second) {
// two up-set osds come from same parent
to_cancel.insert(pg);
break;
}
} else {
lderr(cct) << __func__ << " unable to get parent of raw osd."
<< osd << " of pg " << pg
<< dendl;
break;
}
auto r = parents.insert(parent);
if (!r.second) {
// two up-set osds come from same parent
to_cancel.insert(pg);
break;
// continue to do checks below
}
}
// the above check validates collision only
Expand Down Expand Up @@ -1740,6 +1744,7 @@ void OSDMap::maybe_remove_pg_upmaps(CephContext *cct,
}
}
}
tmpmap.clean_pg_upmaps(cct, pending_inc);
}

int OSDMap::apply_incremental(const Incremental &inc)
Expand Down Expand Up @@ -3905,7 +3910,7 @@ int OSDMap::summarize_mapping_stats(

int OSDMap::clean_pg_upmaps(
CephContext *cct,
Incremental *pending_inc)
Incremental *pending_inc) const
{
ldout(cct, 10) << __func__ << dendl;
int changed = 0;
Expand Down Expand Up @@ -4069,6 +4074,8 @@ int OSDMap::calc_pg_upmaps(
multimap<float,int> deviation_osd; // deviation(pgs), osd
set<int> overfull;
for (auto& i : pgs_by_osd) {
// make sure osd is still there (belongs to this crush-tree)
assert(osd_weight.count(i.first));
float target = osd_weight[i.first] * pgs_per_weight;
float deviation = (float)i.second.size() - target;
ldout(cct, 20) << " osd." << i.first
Expand Down Expand Up @@ -4107,8 +4114,6 @@ int OSDMap::calc_pg_upmaps(
for (auto p = deviation_osd.rbegin(); p != deviation_osd.rend(); ++p) {
int osd = p->second;
float deviation = p->first;
// make sure osd is still there (belongs to this crush-tree)
assert(osd_weight.count(osd));
float target = osd_weight[osd] * pgs_per_weight;
assert(target > 0);
if (deviation/target < max_deviation_ratio) {
Expand Down
7 changes: 6 additions & 1 deletion src/osd/OSDMap.h
Expand Up @@ -1313,7 +1313,7 @@ class OSDMap {

int clean_pg_upmaps(
CephContext *cct,
Incremental *pending_inc);
Incremental *pending_inc) const;

bool try_pg_upmap(
CephContext *cct,
Expand All @@ -1333,6 +1333,11 @@ class OSDMap {

int get_osds_by_bucket_name(const string &name, set<int> *osds) const;

bool have_pg_upmaps(pg_t pg) const {
return pg_upmap.count(pg) ||
pg_upmap_items.count(pg);
}

/*
* handy helpers to build simple maps...
*/
Expand Down
85 changes: 85 additions & 0 deletions src/test/osd/TestOSDMap.cc
Expand Up @@ -598,6 +598,91 @@ TEST_F(OSDMapTest, CleanPGUpmaps) {
ASSERT_TRUE(parent_0 != parent_1);
}

{
// cancel stale upmaps
osdmap.pg_to_raw_up(pgid, &up, &up_primary);
int from = -1;
for (int i = 0; i < (int)get_num_osds(); i++) {
if (std::find(up.begin(), up.end(), i) == up.end()) {
from = i;
break;
}
}
ASSERT_TRUE(from >= 0);
int to = -1;
for (int i = 0; i < (int)get_num_osds(); i++) {
if (std::find(up.begin(), up.end(), i) == up.end() && i != from) {
to = i;
break;
}
}
ASSERT_TRUE(to >= 0);
vector<pair<int32_t,int32_t>> new_pg_upmap_items;
new_pg_upmap_items.push_back(make_pair(from, to));
OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
pending_inc.new_pg_upmap_items[pgid] =
mempool::osdmap::vector<pair<int32_t,int32_t>>(
new_pg_upmap_items.begin(), new_pg_upmap_items.end());
OSDMap nextmap;
nextmap.deepish_copy_from(osdmap);
nextmap.apply_incremental(pending_inc);
ASSERT_TRUE(nextmap.have_pg_upmaps(pgid));
OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1);
nextmap.clean_pg_upmaps(g_ceph_context, &new_pending_inc);
nextmap.apply_incremental(new_pending_inc);
ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
}

{
// https://tracker.ceph.com/issues/37493
pg_t ec_pg(0, my_ec_pool);
pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
int from = -1;
int to = -1;
{
// insert a valid pg_upmap_item
vector<int> ec_up;
int ec_up_primary;
osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
ASSERT_TRUE(!ec_up.empty());
from = *(ec_up.begin());
ASSERT_TRUE(from >= 0);
for (int i = 0; i < (int)get_num_osds(); i++) {
if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
to = i;
break;
}
}
ASSERT_TRUE(to >= 0);
ASSERT_TRUE(from != to);
vector<pair<int32_t,int32_t>> new_pg_upmap_items;
new_pg_upmap_items.push_back(make_pair(from, to));
OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
pending_inc.new_pg_upmap_items[ec_pgid] =
mempool::osdmap::vector<pair<int32_t,int32_t>>(
new_pg_upmap_items.begin(), new_pg_upmap_items.end());
tmpmap.deepish_copy_from(osdmap);
tmpmap.apply_incremental(pending_inc);
ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
}
{
// mark one of the target OSDs of the above pg_upmap_item as down
OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
pending_inc.new_state[to] = CEPH_OSD_UP;
tmpmap.apply_incremental(pending_inc);
ASSERT_TRUE(!tmpmap.is_up(to));
ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
}
{
// confirm *maybe_remove_pg_upmaps* won't do anything bad
OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
tmpmap.maybe_remove_pg_upmaps(g_ceph_context, tmpmap, &pending_inc);
tmpmap.apply_incremental(pending_inc);
ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
}
}

{
// TEST pg_upmap
{
Expand Down

0 comments on commit 2083a34

Please sign in to comment.