Skip to content

Commit

Permalink
crush: rm-device-class support
Browse files Browse the repository at this point in the history
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
  • Loading branch information
xiexingguo committed Jul 25, 2017
1 parent 5686037 commit b80c530
Show file tree
Hide file tree
Showing 5 changed files with 295 additions and 3 deletions.
146 changes: 144 additions & 2 deletions src/crush/CrushWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1327,13 +1327,17 @@ pair<string,string> CrushWrapper::get_immediate_parent(int id, int *_ret)
return pair<string, string>();
}

int CrushWrapper::get_immediate_parent_id(int id, int *parent) const
int CrushWrapper::get_immediate_parent_id(int id,
int *parent,
parent_type_t choice) const
{
for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
crush_bucket *b = crush->buckets[bidx];
if (b == 0)
continue;
if (shadow_item(b->id)) {
if (choice == PARENT_NONSHADOW && shadow_item(b->id)) {
continue;
} else if (choice == PARENT_SHADOW && !shadow_item(b->id)) {
continue;
}
for (unsigned i = 0; i < b->size; i++) {
Expand Down Expand Up @@ -1817,6 +1821,144 @@ int CrushWrapper::update_device_class(int id,
return 1;
}

// Recursively get all only-child shadow parents up to the root
// based on the given item id.
// Return -EBUSY if any parent has more than one children.
// Return -ENOENT if a specific parent can not be found.
// Return 0 on success.
int CrushWrapper::get_immediate_only_child_shadow_parents(CephContext *cct,
int id,
list<int> *parents)
{
assert(parents);
parents->clear();
while (_search_item_exists(id)) {
int parent;
int r = get_immediate_parent_id(id, &parent, PARENT_SHADOW);
if (r < 0) {
ldout(cct, 0) << __func__ << " unable to get immediate shadow parent of"
<< " item id = " << id << dendl;
return -ENOENT;
}
crush_bucket *b = get_bucket(parent);
if (IS_ERR(b)) {
ldout(cct, 0) << __func__ << " unable to locate parent id = "
<< parent << dendl;
return -ENOENT;
}
if (b->size != 1) {
assert(b->size > 0);
ldout(cct, 0) << __func__ << " parent id = " << parent
<< " have " << b->size << " children,"
<< " returning" << dendl;
return -EBUSY;
}
parents->push_back(parent);
id = parent;
}
return 0;
}

int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
{
assert(ss);
const char *name = get_item_name(id);
if (!name) {
*ss << "osd." << id << " does not have a name";
return -ENOENT;
}

const char *class_name = get_item_class(id);
if (!class_name) {
*ss << "osd." << id << " has not been bound to a specific class yet";
return 0;
}
class_remove_item(id);

// see if we have to destroy the whole shadow tree too
// note that we can delete an empty bucket only, thus we
// pre-collect the complete tree topology here
list<int> parents;
bool remove_child_only = false;
int r = get_immediate_only_child_shadow_parents(cct, id, &parents);
if (r == -EBUSY) {
// parents still have other child.
remove_child_only = true;
} else if (r == -ENOENT) {
*ss << "unable to locate shadow parents of osd." << id;
return r;
} else {
assert(r == 0); // no other errors allowed, for now
}

int direct_shadow_parent;
r = get_immediate_parent_id(id, &direct_shadow_parent, PARENT_SHADOW);
if (r < 0) {
*ss << "unable to locate direct shadow parent of osd." << id;
return -ENOENT;
}
crush_bucket *b = get_bucket(direct_shadow_parent);
assert(b);
for (auto& p : choose_args) {
// weight down each weight-set to 0 before we remove the item
vector<int> weightv(get_choose_args_positions(p.second), 0);
choose_args_adjust_item_weight(cct, p.second, id, weightv, ss);
}
bucket_remove_item(b, id);
adjust_item_weight(cct, b->id, b->weight);
if (remove_child_only) {
return 0;
}

for (auto i = parents.begin(); i != parents.end(); i++) {
auto parenti = i;
++parenti;
bool is_root = parenti == parents.end();
if (!is_root) {
ldout(cct, 5) << __func__ << " removing empty shadow parent:"
<< " id = " << *i << ","
<< " name = " << get_item_name(*i) << ";"
<< " parent id = " << *parenti << ","
<< " parent name = " << get_item_name(*parenti)
<< dendl;
// remove myself from parent first
crush_bucket *parentb = get_bucket(*parenti);
assert(parentb);
for (auto& p : choose_args) {
// weight down each weight-set to 0 before we remove the item
vector<int> weightv(get_choose_args_positions(p.second), 0);
choose_args_adjust_item_weight(cct, p.second, *i, weightv, ss);
}
bucket_remove_item(parentb, *i);
adjust_item_weight(cct, parentb->id, parentb->weight);
}

crush_bucket *b = get_bucket(*i);
assert(b);
assert(b->size == 0); // shall be empty now
ldout(cct, 5) << __func__ << " removing empty shadow parent:"
<< " id = " << *i << ","
<< " name = " << get_item_name(*i)
<< dendl;
crush_remove_bucket(crush, b);
if (class_bucket.count(*i) != 0) {
class_bucket.erase(*i);
}
class_remove_item(*i);
// remove from name_map too
name_map.erase(*i);
have_rmaps = false;
}

set<int> shadow_roots;
find_shadow_roots_by_class(class_name, shadow_roots);
if (shadow_roots.empty()) {
// no more shadow roots bounded to this class
remove_class_name(class_name);
}
return 0;
}

int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
{
const char *item_name = get_item_name(original_id);
Expand Down
46 changes: 45 additions & 1 deletion src/crush/CrushWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,37 @@ class CrushWrapper {
}
}

/**
* find tree roots that consist of shadow (device class) items
*
* These are parentless nodes in the map that consist of shadow
* items for device classes.
*/
void find_shadow_roots(set<int>& roots) const {
set<int> all;
find_roots(all);
for (auto& p: all) {
if (shadow_item(p)) {
roots.insert(p);
}
}
}

/**
* find specific tree roots belonging to the given device class.
*/
void find_shadow_roots_by_class(const string& class_name,
set<int>& roots) const {
set<int> all;
find_shadow_roots(all);
for (auto& p: all) {
string root_name = get_item_name(p);
if (root_name.find(string("~") + class_name) != string::npos) {
roots.insert(p);
}
}
}

/**
* see if an item is contained within a subtree
*
Expand Down Expand Up @@ -669,7 +700,16 @@ class CrushWrapper {
* FIXME: ambiguous for items that occur multiple times in the map
*/
pair<string,string> get_immediate_parent(int id, int *ret = NULL);
int get_immediate_parent_id(int id, int *parent) const;

typedef enum {
PARENT_NONSHADOW,
PARENT_SHADOW,
PARENT_ALL,
} parent_type_t;

int get_immediate_parent_id(int id,
int *parent,
parent_type_t choice = PARENT_NONSHADOW) const;

/**
* return ancestor of the given type, or 0 if none
Expand Down Expand Up @@ -1214,6 +1254,10 @@ class CrushWrapper {
}

int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
int get_immediate_only_child_shadow_parents(CephContext *cct,
int id,
list<int> *parents);
int remove_device_class(CephContext *cct, int id, ostream *ss);
int device_class_clone(int original, int device_class, int *clone);
bool class_is_in_use(int class_id, ostream *ss = nullptr);
int populate_classes();
Expand Down
5 changes: 5 additions & 0 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,11 @@ COMMAND("osd crush set-device-class " \
"set the <class> of the osd(s) <id> [<id>...]," \
"or use <all|any|*> to set all.", \
"osd", "rw", "cli,rest")
COMMAND("osd crush rm-device-class " \
"name=ids,type=CephString,n=N", \
"remove class of the osd(s) <id> [<id>...]," \
"or use <all|any|*> to remove all.", \
"osd", "rw", "cli,rest")
COMMAND("osd crush create-or-move " \
"name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \
Expand Down
63 changes: 63 additions & 0 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7453,6 +7453,69 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
return true;
}

} else if (prefix == "osd crush rm-device-class") {
bool stop = false;
vector<string> idvec;
cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
CrushWrapper newcrush;
_get_pending_crush(newcrush);
set<int> updated;

for (unsigned j = 0; j < idvec.size() && !stop; j++) {
set<int> osds;

// wildcard?
if (j == 0 &&
(idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
osdmap.get_all_osds(osds);
stop = true;
} else {
// try traditional single osd way
long osd = parse_osd_id(idvec[j].c_str(), &ss);
if (osd < 0) {
// ss has reason for failure
ss << ", unable to parse osd id:\"" << idvec[j] << "\". ";
err = -EINVAL;
goto reply;
}
osds.insert(osd);
}

for (auto &osd : osds) {
if (!osdmap.exists(osd)) {
ss << "osd." << osd << " does not exist. ";
continue;
}

auto class_name = newcrush.get_item_class(osd);
stringstream ts;
if (!class_name) {
ss << "osd." << osd << " belongs to no class, ";
continue;
}
// note that we do not verify if class_is_in_use here
// in case the device is misclassified and user wants
// to overridely reset...

err = newcrush.remove_device_class(g_ceph_context, osd, &ss);
if (err < 0) {
// ss has reason for failure
goto reply;
}
updated.insert(osd);
}
}

if (!updated.empty()) {
pending_inc.crush.clear();
newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
ss << "done removing class of osd(s): " << updated;
getline(ss, rs);
wait_for_finished_proposal(op,
new Monitor::C_Command(mon,op, 0, rs, get_last_committed() + 1));
return true;
}

} else if (prefix == "osd crush add-bucket") {
// os crush add-bucket <name> <type>
string name, typestr;
Expand Down
38 changes: 38 additions & 0 deletions src/test/crush/crush-classes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,51 @@ function TEST_mon_classes() {
ceph osd crush class rm CLASS || return 1
expect_failure $dir ENOENT ceph osd crush class rm CLASS || return 1

# test rm-device-class
ceph osd crush set-device-class aaa osd.0 || return 1
ceph osd tree | grep -q 'aaa' || return 1
ceph osd crush dump | grep -q '~aaa' || return 1
ceph osd crush set-device-class bbb osd.1 || return 1
ceph osd tree | grep -q 'bbb' || return 1
ceph osd crush dump | grep -q '~bbb' || return 1
ceph osd crush set-device-class ccc osd.2 || return 1
ceph osd tree | grep -q 'ccc' || return 1
ceph osd crush dump | grep -q '~ccc' || return 1
ceph osd crush rm-device-class 0 || return 1
ceph osd tree | grep -q 'aaa' && return 1
ceph osd crush dump | grep -q '~aaa' && return 1
ceph osd crush class ls | grep -q 'aaa' && return 1
ceph osd crush rm-device-class 1 || return 1
ceph osd tree | grep -q 'bbb' && return 1
ceph osd crush dump | grep -q '~bbb' && return 1
ceph osd crush class ls | grep -q 'bbb' && return 1
ceph osd crush rm-device-class 2 || return 1
ceph osd tree | grep -q 'ccc' && return 1
ceph osd crush dump | grep -q '~ccc' && return 1
ceph osd crush class ls | grep -q 'ccc' && return 1
ceph osd crush set-device-class asdf all || return 1
ceph osd tree | grep -q 'asdf' || return 1
ceph osd crush dump | grep -q '~asdf' || return 1
ceph osd crush rm-device-class all || return 1
ceph osd tree | grep -q 'asdf' && return 1
ceph osd crush dump | grep -q '~asdf' && return 1

ceph osd crush set-device-class abc osd.2 || return 1
ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1
out=`ceph osd tree |awk '$1 == 2 && $2 == "abc" {print $0}'`
if [ "$out" == "" ]; then
return 1
fi

# verify 'crush move' too
ceph osd crush dump | grep -q 'foo~abc' || return 1
ceph osd crush dump | grep -q 'foo-rack~abc' || return 1
ceph osd crush dump | grep -q 'foo-host~abc' || return 1
ceph osd crush rm-device-class osd.2 || return 1
ceph osd crush dump | grep -q 'foo~abc' && return 1
ceph osd crush dump | grep -q 'foo-rack~abc' && return 1
ceph osd crush dump | grep -q 'foo-host~abc' && return 1

ceph osd crush rule create-replicated foo-rule foo host abc || return 1

# test class_is_in_use
Expand Down

0 comments on commit b80c530

Please sign in to comment.