Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mimic: crush: backport recent upmap fixes #27963

Merged
merged 6 commits into from May 9, 2019
@@ -1952,6 +1952,22 @@ std::vector<Option> get_global_options() {
.set_min(2)
.set_description("Number of striping periods to zero head of MDS journal write position"),

Option("osd_calc_pg_upmaps_aggressively", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("try to calculate PG upmaps more aggressively, e.g., "
"by doing a fairly exhaustive search of existing PGs "
"that can be unmapped or upmapped"),

Option("osd_calc_pg_upmaps_max_stddev", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
.set_default(1.0)
.set_description("standard deviation below which there is no attempt made "
"while trying to calculate PG upmaps"),

Option("osd_calc_pg_upmaps_local_fallback_retries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Maximum number of PGs we can attempt to unmap or upmap "
"for a specific overfull or underfull osd per iteration "),

Option("osd_smart_report_timeout", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(5)
.set_description("Timeout (in seconds) for smarctl to run, default is set to 5"),
@@ -890,23 +890,84 @@ void CrushWrapper::get_children_of_type(int id,
}
}

int CrushWrapper::get_rule_failure_domain(int rule_id)
{
crush_rule *rule = get_rule(rule_id);
if (IS_ERR(rule)) {
int CrushWrapper::verify_upmap(CephContext *cct,
int rule_id,
int pool_size,
const vector<int>& up)
{
auto rule = get_rule(rule_id);
if (IS_ERR(rule) || !rule) {
lderr(cct) << __func__ << " rule " << rule_id << " does not exist"
<< dendl;
return -ENOENT;
}
int type = 0; // default to osd-level
for (unsigned s = 0; s < rule->len; ++s) {
if ((rule->steps[s].op == CRUSH_RULE_CHOOSE_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSE_INDEP ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_FIRSTN ||
rule->steps[s].op == CRUSH_RULE_CHOOSELEAF_INDEP) &&
rule->steps[s].arg2 > type) {
type = rule->steps[s].arg2;
for (unsigned step = 0; step < rule->len; ++step) {
auto curstep = &rule->steps[step];
ldout(cct, 10) << __func__ << " step " << step << dendl;
switch (curstep->op) {
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
case CRUSH_RULE_CHOOSELEAF_INDEP:
{
int type = curstep->arg2;
if (type == 0) // osd
break;
map<int, set<int>> osds_by_parent; // parent_of_desired_type -> osds
for (auto osd : up) {
auto parent = get_parent_of_type(osd, type, rule_id);
if (parent < 0) {
osds_by_parent[parent].insert(osd);
} else {
ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
<< ", skipping for now"
<< dendl;
}
}
for (auto i : osds_by_parent) {
if (i.second.size() > 1) {
lderr(cct) << __func__ << " multiple osds " << i.second
<< " come from same failure domain " << i.first
<< dendl;
return -EINVAL;
}
}
}
break;

case CRUSH_RULE_CHOOSE_FIRSTN:
case CRUSH_RULE_CHOOSE_INDEP:
{
int numrep = curstep->arg1;
int type = curstep->arg2;
if (type == 0) // osd
break;
if (numrep <= 0)
numrep += pool_size;
set<int> parents_of_type;
for (auto osd : up) {
auto parent = get_parent_of_type(osd, type, rule_id);
if (parent < 0) {
parents_of_type.insert(parent);
} else {
ldout(cct, 1) << __func__ << " unable to get parent of osd." << osd
<< ", skipping for now"
<< dendl;
}
}
if ((int)parents_of_type.size() > numrep) {
lderr(cct) << __func__ << " number of buckets "
<< parents_of_type.size() << " exceeds desired " << numrep
<< dendl;
return -EINVAL;
}
}
break;

default:
// ignore
break;
}
}
return type;
return 0;
}

int CrushWrapper::_get_leaves(int id, list<int> *leaves) const
@@ -742,12 +742,15 @@ class CrushWrapper {
set<int> *children,
bool exclude_shadow = true) const;


/**
* get failure-domain type of a specific crush rule
* @param rule_id crush rule id
* @return type of failure-domain or a negative errno on error.
*/
int get_rule_failure_domain(int rule_id);
* verify upmapping results.
* return 0 on success or a negative errno on error.
*/
int verify_upmap(CephContext *cct,
int rule_id,
int pool_size,
const vector<int>& up);

/**
* enumerate leaves(devices) of given node
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.