Skip to content

Commit

Permalink
osd: add no{out,down,in,out} flags by device class
Browse files Browse the repository at this point in the history
This works as a good supplement of #27563.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
  • Loading branch information
xiexingguo committed Apr 24, 2019
1 parent 78bcae1 commit 21e1faa
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 71 deletions.
9 changes: 5 additions & 4 deletions doc/rados/operations/health-checks.rst
Expand Up @@ -201,7 +201,7 @@ ____________

One or more cluster flags of interest has been set. These flags include:

* *full* - the cluster is flagged as full and cannot service writes
* *full* - the cluster is flagged as full and cannot serve writes
* *pauserd*, *pausewr* - paused reads or writes
* *noup* - OSDs are not allowed to start
* *nodown* - OSD failure reports are being ignored, such that the
Expand All @@ -223,7 +223,7 @@ With the exception of *full*, these flags can be set or cleared with::
OSD_FLAGS
_________

One or more OSDs or CRUSH nodes has a flag of interest set. These flags include:
One or more OSDs or CRUSH {nodes, device classes} has a flag of interest set. These flags include:

* *noup*: these OSDs are not allowed to start
* *nodown*: failure reports for these OSDs will be ignored
Expand All @@ -234,13 +234,14 @@ One or more OSDs or CRUSH nodes has a flag of interest set. These flags include

These flags can be set and cleared with::

ceph osd add-<flag> <osd-id-or-crush-node-name>
ceph osd rm-<flag> <osd-id-or-crush-node-name>
ceph osd add-<flag> <osd-id-or-crush-node-name-or-device-class-name>
ceph osd rm-<flag> <osd-id-or-crush-node-name-or-device-class-name>

For example, ::

ceph osd rm-nodown osd.123
ceph osd rm-noout hostfoo
ceph osd rm-noout hdd

OLD_CRUSH_TUNABLES
__________________
Expand Down
13 changes: 13 additions & 0 deletions qa/workunits/cephtool/test.sh
Expand Up @@ -1597,6 +1597,19 @@ function test_mon_osd()
ceph osd crush rm foo
ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep foo

# test device class flags
osd_0_device_class=$(ceph osd crush get-device-class osd.0)
ceph osd add-noup $osd_0_device_class
ceph osd add-nodown $osd_0_device_class
ceph osd add-noin $osd_0_device_class
ceph osd add-noout $osd_0_device_class
ceph osd dump -f json-pretty | jq ".device_class_flags" | grep $osd_0_device_class
ceph osd rm-noup $osd_0_device_class
ceph osd rm-nodown $osd_0_device_class
ceph osd rm-noin $osd_0_device_class
ceph osd rm-noout $osd_0_device_class
ceph osd dump -f json-pretty | jq ".device_class_flags" | expect_false grep $osd_0_device_class

# make sure mark out preserves weight
ceph osd reweight osd.0 .5
ceph osd dump | grep ^osd.0 | grep 'weight 0.5'
Expand Down
6 changes: 6 additions & 0 deletions src/crush/CrushWrapper.h
Expand Up @@ -505,6 +505,12 @@ class CrushWrapper {
return 0;
return get_class_name(p->second);
}
int get_item_class_id(int t) const {
auto p = class_map.find(t);
if (p == class_map.end())
return -ENOENT;
return p->second;
}
int set_item_class(int i, const std::string& name) {
if (!is_valid_crush_name(name))
return -EINVAL;
Expand Down
117 changes: 58 additions & 59 deletions src/mon/OSDMonitor.cc
Expand Up @@ -2442,25 +2442,12 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op)

bool OSDMonitor::can_mark_down(int i)
{
if (osdmap.test_flag(CEPH_OSDMAP_NODOWN)) {
dout(5) << __func__ << " NODOWN flag set, will not mark osd." << i
<< " down" << dendl;
return false;
}

if (osdmap.is_nodown(i)) {
dout(5) << __func__ << " osd." << i << " is marked as nodown, "
<< "will not mark it down" << dendl;
return false;
}

if (osdmap.get_crush_node_flags(i) & CEPH_OSD_NODOWN) {
dout(5) << __func__ << " osd." << i
<< " is marked as nodown via a crush node flag, "
<< "will not mark it down" << dendl;
return false;
}

int num_osds = osdmap.get_num_osds();
if (num_osds == 0) {
dout(5) << __func__ << " no osds" << dendl;
Expand All @@ -2479,25 +2466,12 @@ bool OSDMonitor::can_mark_down(int i)

bool OSDMonitor::can_mark_up(int i)
{
if (osdmap.test_flag(CEPH_OSDMAP_NOUP)) {
dout(5) << __func__ << " NOUP flag set, will not mark osd." << i
<< " up" << dendl;
return false;
}

if (osdmap.is_noup(i)) {
dout(5) << __func__ << " osd." << i << " is marked as noup, "
<< "will not mark it up" << dendl;
return false;
}

if (osdmap.get_crush_node_flags(i) & CEPH_OSD_NOUP) {
dout(5) << __func__ << " osd." << i
<< " is marked as noup via a crush node flag, "
<< "will not mark it up" << dendl;
return false;
}

return true;
}

Expand All @@ -2507,24 +2481,12 @@ bool OSDMonitor::can_mark_up(int i)
*/
bool OSDMonitor::can_mark_out(int i)
{
if (osdmap.test_flag(CEPH_OSDMAP_NOOUT)) {
dout(5) << __func__ << " NOOUT flag set, will not mark osds out" << dendl;
return false;
}

if (osdmap.is_noout(i)) {
dout(5) << __func__ << " osd." << i << " is marked as noout, "
<< "will not mark it out" << dendl;
return false;
}

if (osdmap.get_crush_node_flags(i) & CEPH_OSD_NOOUT) {
dout(5) << __func__ << " osd." << i
<< " is marked as noout via a crush node flag, "
<< "will not mark it out" << dendl;
return false;
}

int num_osds = osdmap.get_num_osds();
if (num_osds == 0) {
dout(5) << __func__ << " no osds" << dendl;
Expand All @@ -2549,25 +2511,12 @@ bool OSDMonitor::can_mark_out(int i)

bool OSDMonitor::can_mark_in(int i)
{
if (osdmap.test_flag(CEPH_OSDMAP_NOIN)) {
dout(5) << __func__ << " NOIN flag set, will not mark osd." << i
<< " in" << dendl;
return false;
}

if (osdmap.is_noin(i)) {
dout(5) << __func__ << " osd." << i << " is marked as noin, "
<< "will not mark it in" << dendl;
return false;
}

if (osdmap.get_crush_node_flags(i) & CEPH_OSD_NOIN) {
dout(5) << __func__ << " osd." << i
<< " is marked as noin via a crush node flag, "
<< "will not mark it in" << dendl;
return false;
}

return true;
}

Expand Down Expand Up @@ -10591,22 +10540,25 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
for (unsigned j = 0; j < idvec.size() && !stop; j++) {
set<int> osds;
set<int> crush_nodes;
set<int> device_classes;

// wildcard?
if (j == 0 &&
(idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
osdmap.get_all_osds(osds);
stop = true;
} else {
if (long osd = parse_osd_id(idvec[j].c_str(), &ss); osd >= 0) {
std::stringstream ts;
if (long osd = parse_osd_id(idvec[j].c_str(), &ts); osd >= 0) {
osds.insert(osd);
} else if (osdmap.crush->name_exists(idvec[j])) {
std::stringstream().swap(ss);
crush_nodes.insert(osdmap.crush->get_item_id(idvec[j]));
} else if (osdmap.crush->class_exists(idvec[j])) {
device_classes.insert(osdmap.crush->get_class_id(idvec[j]));
} else {
// ss has reason for failure
ss << ", unable to parse osd id or crush node:\"" << idvec[j]
<< "\". ";
ss << "unable to parse osd id or crush node or device class:\""
<< idvec[j] << "\". ";
err = -EINVAL;
continue;
}
Expand Down Expand Up @@ -10634,6 +10586,28 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
}
any = true;
}
for (auto &i : device_classes) {
auto q = osdmap.device_class_flags.find(i);
if (pending_inc.new_device_class_flags.count(i) == 0 &&
q != osdmap.device_class_flags.end()) {
pending_inc.new_device_class_flags[i] = q->second;
}
switch (option) {
case OP_NOUP:
pending_inc.new_device_class_flags[i] |= CEPH_OSD_NOUP;
break;
case OP_NODOWN:
pending_inc.new_device_class_flags[i] |= CEPH_OSD_NODOWN;
break;
case OP_NOIN:
pending_inc.new_device_class_flags[i] |= CEPH_OSD_NOIN;
break;
case OP_NOOUT:
pending_inc.new_device_class_flags[i] |= CEPH_OSD_NOOUT;
break;
}
any = true;
}
for (auto &osd : osds) {
if (!osdmap.exists(osd)) {
ss << "osd." << osd << " does not exist. ";
Expand Down Expand Up @@ -10728,6 +10702,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
for (unsigned j = 0; j < idvec.size() && !stop; j++) {
vector<int> osds;
set<int> crush_nodes;
set<int> device_classes;

// wildcard?
if (j == 0 &&
Expand Down Expand Up @@ -10792,15 +10767,17 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,

stop = true;
} else {
if (long osd = parse_osd_id(idvec[j].c_str(), &ss); osd >= 0) {
std::stringstream ts;
if (long osd = parse_osd_id(idvec[j].c_str(), &ts); osd >= 0) {
osds.push_back(osd);
} else if (osdmap.crush->name_exists(idvec[j])) {
std::stringstream().swap(ss);
crush_nodes.insert(osdmap.crush->get_item_id(idvec[j]));
} else if (osdmap.crush->class_exists(idvec[j])) {
device_classes.insert(osdmap.crush->get_class_id(idvec[j]));
} else {
// ss has reason for failure
ss << ", unable to parse osd id or crush node:\"" << idvec[j]
<< "\". ";
ss << "unable to parse osd id or crush node or device class:\""
<< idvec[j] << "\". ";
err = -EINVAL;
continue;
}
Expand Down Expand Up @@ -10828,6 +10805,28 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
}
any = true;
}
for (auto &i : device_classes) {
auto q = osdmap.device_class_flags.find(i);
if (pending_inc.new_device_class_flags.count(i) == 0 &&
q != osdmap.device_class_flags.end()) {
pending_inc.new_device_class_flags[i] = q->second;
}
switch (option) {
case OP_NOUP:
pending_inc.new_device_class_flags[i] &= ~CEPH_OSD_NOUP;
break;
case OP_NODOWN:
pending_inc.new_device_class_flags[i] &= ~CEPH_OSD_NODOWN;
break;
case OP_NOIN:
pending_inc.new_device_class_flags[i] &= ~CEPH_OSD_NOIN;
break;
case OP_NOOUT:
pending_inc.new_device_class_flags[i] &= ~CEPH_OSD_NOOUT;
break;
}
any = true;
}
for (auto &osd : osds) {
if (!osdmap.exists(osd)) {
ss << "osd." << osd << " does not exist. ";
Expand Down

0 comments on commit 21e1faa

Please sign in to comment.