Skip to content

Commit

Permalink
Merge pull request #17508 from liewegas/wip-crushrule-convert
Browse files Browse the repository at this point in the history
mon: more aggressively convert crush rulesets -> distinct rules

Reviewed-by: Kefu Chai <kchai@redhat.com>
  • Loading branch information
tchaikov committed Sep 19, 2017
2 parents 42bb43b + 8f8f0b2 commit f59dc0a
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 83 deletions.
48 changes: 7 additions & 41 deletions src/crush/CrushWrapper.cc
Expand Up @@ -13,7 +13,7 @@

#define dout_subsys ceph_subsys_crush

bool CrushWrapper::has_legacy_rulesets() const
bool CrushWrapper::has_legacy_rule_ids() const
{
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
Expand All @@ -25,51 +25,17 @@ bool CrushWrapper::has_legacy_rulesets() const
return false;
}

int CrushWrapper::renumber_rules_by_ruleset()
std::map<int, int> CrushWrapper::renumber_rules()
{
int max_ruleset = 0;
std::map<int, int> result;
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (r && r->mask.ruleset >= max_ruleset) {
max_ruleset = r->mask.ruleset + 1;
if (r && r->mask.ruleset != i) {
result[r->mask.ruleset] = i;
r->mask.ruleset = i;
}
}
struct crush_rule **newrules =
(crush_rule**)calloc(1, max_ruleset * sizeof(crush_rule*));
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (!r)
continue;
if (newrules[r->mask.ruleset]) {
// collision, we can't do it.
free(newrules);
return -EINVAL;
}
newrules[r->mask.ruleset] = r;
}

// success, swap!
free(crush->rules);
crush->rules = newrules;
crush->max_rules = max_ruleset;
return 0;
}

bool CrushWrapper::has_multirule_rulesets() const
{
for (unsigned i=0; i<crush->max_rules; i++) {
crush_rule *r = crush->rules[i];
if (!r)
continue;
for (unsigned j=i+1; j<crush->max_rules; j++) {
crush_rule *s = crush->rules[j];
if (!s)
continue;
if (r->mask.ruleset == s->mask.ruleset)
return true;
}
}
return false;
return result;
}

bool CrushWrapper::has_non_straw2_buckets() const
Expand Down
29 changes: 20 additions & 9 deletions src/crush/CrushWrapper.h
Expand Up @@ -120,14 +120,25 @@ class CrushWrapper {
set_tunables_default();
}

/// true if any rule has a ruleset != the rule id
bool has_legacy_rulesets() const;

/// fix rules whose ruleid != ruleset
int renumber_rules_by_ruleset();
/**
* true if any rule has a rule id != its position in the array
*
* These indicate "ruleset" IDs that were created by older versions
* of Ceph. They are cleaned up in renumber_rules so that eventually
* we can remove the code for handling them.
*/
bool has_legacy_rule_ids() const;

/// true if any ruleset has more than 1 rule
bool has_multirule_rulesets() const;
/**
* fix rules whose ruleid != ruleset
*
* These rules were created in older versions of Ceph. The concept
* of a ruleset no longer exists.
*
* Return a map of old ID -> new ID. Caller must update OSDMap
* to use new IDs.
*/
std::map<int, int> renumber_rules();

/// true if any buckets that aren't straw2
bool has_non_straw2_buckets() const;
Expand Down Expand Up @@ -1238,7 +1249,7 @@ class CrushWrapper {
void finalize() {
assert(crush);
crush_finalize(crush);
have_uniform_rules = !has_legacy_rulesets();
have_uniform_rules = !has_legacy_rule_ids();
}

int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
Expand Down Expand Up @@ -1315,7 +1326,7 @@ class CrushWrapper {
/**
* Return the lowest numbered ruleset of type `type`
*
* @returns a ruleset ID, or -1 if no matching rulesets found.
* @returns a ruleset ID, or -1 if no matching rules found.
*/
int find_first_ruleset(int type) const {
int result = -1;
Expand Down
68 changes: 39 additions & 29 deletions src/mon/OSDMonitor.cc
Expand Up @@ -543,23 +543,6 @@ void OSDMonitor::on_active()
void OSDMonitor::on_restart()
{
last_osd_report.clear();

if (mon->is_leader()) {
// fix ruleset != ruleid
if (osdmap.crush->has_legacy_rulesets() &&
!osdmap.crush->has_multirule_rulesets()) {
CrushWrapper newcrush;
_get_pending_crush(newcrush);
int r = newcrush.renumber_rules_by_ruleset();
if (r >= 0) {
dout(1) << __func__ << " crush map has ruleset != rule id; fixing" << dendl;
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
} else {
dout(10) << __func__ << " unable to renumber rules by ruleset" << dendl;
}
}
}
}

void OSDMonitor::on_shutdown()
Expand Down Expand Up @@ -622,6 +605,40 @@ void OSDMonitor::create_pending()
<< pending_inc.new_nearfull_ratio << dendl;
}
}

// Rewrite CRUSH rule IDs if they are using legacy "ruleset"
// structure.
if (osdmap.crush->has_legacy_rule_ids()) {
CrushWrapper newcrush;
_get_pending_crush(newcrush);

// First, for all pools, work out which rule they really used
// by resolving ruleset to rule.
for (const auto &i : osdmap.get_pools()) {
const auto pool_id = i.first;
const auto &pool = i.second;
int new_rule_id = newcrush.find_rule(pool.crush_rule,
pool.type, pool.size);

dout(1) << __func__ << " rewriting pool "
<< osdmap.get_pool_name(pool_id) << " crush ruleset "
<< pool.crush_rule << " -> rule id " << new_rule_id << dendl;
if (pending_inc.new_pools.count(pool_id) == 0) {
pending_inc.new_pools[pool_id] = pool;
}
pending_inc.new_pools[pool_id].crush_rule = new_rule_id;
}

// Now, go ahead and renumber all the rules so that their
// rule_id field corresponds to their position in the array
auto old_to_new = newcrush.renumber_rules();
dout(1) << __func__ << " Rewrote " << old_to_new << " crush IDs:" << dendl;
for (const auto &i : old_to_new) {
dout(1) << __func__ << " " << i.first << " -> " << i.second << dendl;
}
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
}
}

creating_pgs_t
Expand Down Expand Up @@ -7154,7 +7171,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
}
}

if (crush.has_legacy_rulesets()) {
if (crush.has_legacy_rule_ids()) {
err = -EINVAL;
ss << "crush maps with ruleset != ruleid are no longer allowed";
goto reply;
Expand All @@ -7164,16 +7181,9 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
goto reply;
}

const auto& osdmap_pools = osdmap.get_pools();
for (auto pit = osdmap_pools.begin(); pit != osdmap_pools.end(); ++pit) {
const int64_t pool_id = pit->first;
const pg_pool_t &pool = pit->second;
int ruleno = pool.get_crush_rule();
if (!crush.rule_exists(ruleno)) {
ss << " the crush rule no "<< ruleno << " for pool id " << pool_id << " is in use";
err = -EINVAL;
goto reply;
}
err = osdmap.validate_crush_rules(&crush, &ss);
if (err < 0) {
goto reply;
}

if (g_conf->mon_osd_crush_smoke_test) {
Expand Down Expand Up @@ -8365,7 +8375,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
// FIXME: this is ok in some situations, but let's not bother with that
// complexity now.
int ruleset = newcrush.get_rule_mask_ruleset(ruleno);
if (osdmap.crush_ruleset_in_use(ruleset)) {
if (osdmap.crush_rule_in_use(ruleset)) {
ss << "crush ruleset " << name << " " << ruleset << " is in use";
err = -EBUSY;
goto reply;
Expand Down
37 changes: 34 additions & 3 deletions src/osd/OSDMap.cc
Expand Up @@ -3236,15 +3236,46 @@ void OSDMap::print_oneline_summary(ostream& out) const
out << " nearfull";
}

bool OSDMap::crush_ruleset_in_use(int ruleset) const
bool OSDMap::crush_rule_in_use(int rule_id) const
{
for (const auto &pool : pools) {
if (pool.second.crush_rule == ruleset)
if (pool.second.crush_rule == rule_id)
return true;
}
return false;
}

int OSDMap::validate_crush_rules(CrushWrapper *newcrush,
ostream *ss) const
{
for (auto& i : pools) {
auto& pool = i.second;
int ruleno = pool.get_crush_rule();
if (!newcrush->rule_exists(ruleno)) {
*ss << "pool " << i.first << " references crush_rule " << ruleno
<< " but it is not present";
return -EINVAL;
}
if (newcrush->get_rule_mask_ruleset(ruleno) != ruleno) {
*ss << "rule " << ruleno << " mask ruleset does not match rule id";
return -EINVAL;
}
if (newcrush->get_rule_mask_type(ruleno) != (int)pool.get_type()) {
*ss << "pool " << i.first << " type does not match rule " << ruleno;
return -EINVAL;
}
if (pool.get_size() < (int)newcrush->get_rule_mask_min_size(ruleno) ||
pool.get_size() > (int)newcrush->get_rule_mask_max_size(ruleno)) {
*ss << "pool " << i.first << " size " << pool.get_size() << " does not"
<< " fall within rule " << ruleno
<< " min_size " << newcrush->get_rule_mask_min_size(ruleno)
<< " and max_size " << newcrush->get_rule_mask_max_size(ruleno);
return -EINVAL;
}
}
return 0;
}

int OSDMap::build_simple_optioned(CephContext *cct, epoch_t e, uuid_d &fsid,
int nosd, int pg_bits, int pgp_bits,
bool default_pool)
Expand Down Expand Up @@ -3962,7 +3993,7 @@ void OSDMap::get_pool_ids_by_osd(CephContext *cct,
set<int> rules;
for (auto &i: raw_rules) {
// exclude any dead rule
if (crush_ruleset_in_use(i)) {
if (crush_rule_in_use(i)) {
rules.insert(i);
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/osd/OSDMap.h
Expand Up @@ -1337,7 +1337,9 @@ class OSDMap {
const string& root,
ostream *ss);

bool crush_ruleset_in_use(int ruleset) const;
bool crush_rule_in_use(int rule_id) const;

int validate_crush_rules(CrushWrapper *crush, ostream *ss) const;

void clear_temp() {
pg_temp->clear();
Expand Down

0 comments on commit f59dc0a

Please sign in to comment.