Skip to content

Commit

Permalink
mon/OSDMonitor:Make the pg_num check more accurate
Browse files Browse the repository at this point in the history
  In check_pg_num function, finding the corresponding osd according to the current pool's crush rule, and calculating whet
her the average value of pg_num on these osd will exceed the value of 'mon_max_pg_per_osd'.Make the pg_num check more accura
te by counting all the pgs on the osd used by the new pool.
Fixes: https://tracker.ceph.com/issues/47062

Signed-off-by: Jerry Luo <luojierui@chinatelecom.cn>
  • Loading branch information
fyzard1991 committed Sep 22, 2021
1 parent 70828cb commit 38294a4
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 18 deletions.
60 changes: 44 additions & 16 deletions src/mon/OSDMonitor.cc
Expand Up @@ -7784,22 +7784,56 @@ int OSDMonitor::get_crush_rule(const string &rule_name,
return 0;
}

int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, ostream *ss)
int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, ostream *ss)
{
auto max_pgs_per_osd = g_conf().get_val<uint64_t>("mon_max_pg_per_osd");
auto num_osds = std::max(osdmap.get_num_in_osds(), 3u); // assume min cluster size 3
auto max_pgs = max_pgs_per_osd * num_osds;
uint64_t projected = 0;
unsigned osd_num = 0;
// assume min cluster size 3
auto num_osds = std::max(osdmap.get_num_in_osds(), 3u);
if (pool < 0) {
// a new pool
projected += pg_num * size;
}
for (const auto& i : osdmap.get_pools()) {
if (i.first == pool) {
if (mapping.get_epoch() >= osdmap.get_epoch()) {
set<int> roots;
CrushWrapper newcrush = _get_pending_crush();
newcrush.find_takes_by_rule(crush_rule, &roots);
int max_osd = osdmap.get_max_osd();
for (auto root : roots) {
const char *rootname = newcrush.get_item_name(root);
set<int> osd_ids;
newcrush.get_leaves(rootname, &osd_ids);
unsigned out_osd = 0;
for (auto id : osd_ids) {
if (id > max_osd) {
out_osd++;
continue;
}
projected += mapping.get_osd_acting_pgs(id).size();
}
osd_num += osd_ids.size() - out_osd;
}
if (pool >= 0) {
// update an existing pool's pg num
const auto& pg_info = osdmap.get_pools().at(pool);
// already counted the pgs of this `pool` by iterating crush map, so
// remove them using adding the specified pg num
projected += pg_num * size;
} else {
projected += i.second.get_pg_num_target() * i.second.get_size();
projected -= pg_info.get_pg_num_target() * pg_info.get_size();
}
num_osds = std::max(osd_num, 3u); // assume min cluster size 3
} else {
// use pg_num target for evaluating the projected pg num
for (const auto& [pool_id, pool_info] : osdmap.get_pools()) {
if (pool_id == pool) {
projected += pg_num * size;
} else {
projected += pool_info.get_pg_num_target() * pool_info.get_size();
}
}
}
auto max_pgs = max_pgs_per_osd * num_osds;
if (projected > max_pgs) {
if (pool >= 0) {
*ss << "pool id " << pool;
Expand Down Expand Up @@ -7895,13 +7929,7 @@ int OSDMonitor::prepare_new_pool(string& name,
<< duration << dendl;
}
unsigned size, min_size;
r = prepare_pool_size(pool_type, erasure_code_profile, repl_size,
&size, &min_size, ss);
if (r) {
dout(10) << "prepare_pool_size returns " << r << dendl;
return r;
}
r = check_pg_num(-1, pg_num, size, ss);
r = check_pg_num(-1, pg_num, size, crush_rule, ss);
if (r) {
dout(10) << "check_pg_num returns " << r << dendl;
return r;
Expand Down Expand Up @@ -8168,7 +8196,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
if (!osdmap.crush->check_crush_rule(p.get_crush_rule(), p.type, n, ss)) {
return -EINVAL;
}
int r = check_pg_num(pool, p.get_pg_num(), n, &ss);
int r = check_pg_num(pool, p.get_pg_num(), n, p.get_crush_rule(), &ss);
if (r < 0) {
return r;
}
Expand Down Expand Up @@ -8274,7 +8302,7 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
return -ERANGE;
}
if (n > (int)p.get_pg_num_target()) {
int r = check_pg_num(pool, n, p.get_size(), &ss);
int r = check_pg_num(pool, n, p.get_size(), p.get_crush_rule(), &ss);
if (r) {
return r;
}
Expand Down
2 changes: 1 addition & 1 deletion src/mon/OSDMonitor.h
Expand Up @@ -521,7 +521,7 @@ class OSDMonitor : public PaxosService,
const std::string &erasure_code_profile,
unsigned *stripe_width,
std::ostream *ss);
int check_pg_num(int64_t pool, int pg_num, int size, std::ostream* ss);
int check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, std::ostream* ss);
int prepare_new_pool(std::string& name,
int crush_rule,
const std::string &crush_rule_name,
Expand Down
2 changes: 1 addition & 1 deletion src/osd/OSDMapMapping.h
Expand Up @@ -321,7 +321,7 @@ class OSDMapMapping {
}
}

const mempool::osdmap_mapping::vector<pg_t>& get_osd_acting_pgs(unsigned osd) {
const mempool::osdmap_mapping::vector<pg_t>& get_osd_acting_pgs(unsigned osd) {
ceph_assert(osd < acting_rmap.size());
return acting_rmap[osd];
}
Expand Down

0 comments on commit 38294a4

Please sign in to comment.