Skip to content
Permalink
Browse files

Merge pull request #27963 from xiexingguo/wip-mimic-upmap-fixes

mimic: crush: backport recent upmap fixes

Reviewed-by: Neha Ojha <nojha@redhat.com>
Reviewed-by: Jan Fajerski <jfajerski@suse.com>
  • Loading branch information...
yuriw committed May 9, 2019
2 parents 75bb5ee + 3230bd5 commit a2acac389cc782e46d307cd052d90977dc380360
@@ -1952,6 +1952,22 @@ std::vector<Option> get_global_options() {
.set_min(2)
.set_description("Number of striping periods to zero head of MDS journal write position"),

Option("osd_calc_pg_upmaps_aggressively", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("try to calculate PG upmaps more aggressively, e.g., "
"by doing a fairly exhaustive search of existing PGs "
"that can be unmapped or upmapped"),

Option("osd_calc_pg_upmaps_max_stddev", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("standard deviation below which there is no attempt made "
"while trying to calculate PG upmaps"),

Option("osd_calc_pg_upmaps_local_fallback_retries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Maximum number of PGs we can attempt to unmap or upmap "
"for a specific overfull or underfull osd per iteration "),

Option("osd_smart_report_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(5)
.set_description("Timeout (in seconds) for smarctl to run, default is set to 5"),
@@ -890,23 +890,84 @@ void CrushWrapper::get_children_of_type(int id,
}
}

int CrushWrapper::get_rule_failure_domain(int rule_id)
{
crush_rule *rule = get_rule(rule_id);
if (IS_ERR(rule)) {
int CrushWrapper::verify_upmap(CephContext *cct,
int rule_id,
int pool_size,
const vector<int>& up)
{
auto rule = get_rule(rule_id);
if (IS_ERR(rule) || !rule) {
lderr(cct) << __func__ << " rule " << rule_id << " does not exist"
<< dendl;
return -ENOENT;
}
int type = 0; // default to osd-level
for (unsigned s = 0; s < rule->len; ++s) {
if ((rule->steps[s].op == CRUSH_RULE_CHOOSE_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSE_INDEP ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_INDEP) &&
rule->steps[s].arg2 > type) {
type = rule->steps[s].arg2;
for (unsigned step = 0; step < rule->len; ++step) {
auto curstep = &rule->steps[step];
ldout(cct, 10) << __func__ << " step " << step << dendl;
switch (curstep->op) {
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSELEAF_INDEP:
{
int type = curstep->arg2;
if (type == 0) // osd
break;
map<int, set<int>> osds_by_parent; // parent_of_desired_type -> osds
for (auto osd : up) {
auto parent = get_parent_of_type(osd, type, rule_id);
if (parent < 0) {
osds_by_parent[parent].insert(osd);
} else {
ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
<< ", skipping for now"
<< dendl;
}
}
for (auto i : osds_by_parent) {
if (i.second.size() > 1) {
lderr(cct) << __func__ << " multiple osds " << i.second
<< " come from same failure domain " << i.first
<< dendl;
return -EINVAL;
}
}
}
break;

case CRUSH_RULE_CHOOSE_FIRSTN:
case CRUSH_RULE_CHOOSE_INDEP:
{
int numrep = curstep->arg1;
int type = curstep->arg2;
if (type == 0) // osd
break;
if (numrep <= 0)
numrep += pool_size;
set<int> parents_of_type;
for (auto osd : up) {
auto parent = get_parent_of_type(osd, type, rule_id);
if (parent < 0) {
parents_of_type.insert(parent);
} else {
ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
<< ", skipping for now"
<< dendl;
}
}
if ((int)parents_of_type.size() > numrep) {
lderr(cct) << __func__ << " number of buckets "
<< parents_of_type.size() << " exceeds desired " << numrep
<< dendl;
return -EINVAL;
}
}
break;

default:
// ignore
break;
}
}
return type;
return 0;
}

int CrushWrapper::_get_leaves(int id, list<int> *leaves) const
@@ -742,12 +742,15 @@ class CrushWrapper {
set<int> *children,
bool exclude_shadow = true) const;


/**
* get failure-domain type of a specific crush rule
* @param rule_id crush rule id
* @return type of failure-domain or a negative errno on error.
*/
int get_rule_failure_domain(int rule_id);
* verify upmapping results.
* return 0 on success or a negative errno on error.
*/
int verify_upmap(CephContext *cct,
int rule_id,
int pool_size,
const vector<int>& up);

/**
* enumerate leaves(devices) of given node

0 comments on commit a2acac3

Please sign in to comment.
You can’t perform that action at this time.