Skip to content

Commit

Permalink
Merge pull request #4658 from ceph/wip-11481
Browse files Browse the repository at this point in the history
#11481: MDS resilience to weird mdsmaps
  • Loading branch information
ukernel committed May 19, 2015
2 parents 1b758c9 + f11de85 commit e585ddf
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 40 deletions.
120 changes: 82 additions & 38 deletions src/mds/MDS.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ int MDS::init(MDSMap::DaemonState wanted_state)
standby_for_rank = MDSMap::MDS_MATCHED_ACTIVE;

beacon.init(mdsmap, want_state, standby_for_rank, standby_for_name);
whoami = -1;
whoami = MDS_RANK_NONE;
messenger->set_myname(entity_name_t::MDS(whoami));

// schedule tick
Expand Down Expand Up @@ -1566,12 +1566,54 @@ void MDS::handle_mds_map(MMDSMap *m)

// see who i am
addr = messenger->get_myaddr();
whoami = mdsmap->get_rank_gid(mds_gid_t(monc->get_global_id()));
state = mdsmap->get_state_gid(mds_gid_t(monc->get_global_id()));
incarnation = mdsmap->get_inc_gid(mds_gid_t(monc->get_global_id()));
whoami = mdsmap->get_rank_gid(mds_gid_t(monc->get_global_id()));
if (whoami == MDS_RANK_NONE && (
state == MDSMap::STATE_STANDBY_REPLAY || state == MDSMap::STATE_ONESHOT_REPLAY)) {
whoami = mdsmap->get_mds_info_gid(mds_gid_t(monc->get_global_id())).standby_for_rank;
}

dout(10) << "map says i am " << addr << " mds." << whoami << "." << incarnation
<< " state " << ceph_mds_state_name(state) << dendl;

// Once I hold a rank it can't be taken away without
// restarting this daemon
if (whoami != oldwhoami && oldwhoami != MDS_RANK_NONE) {
derr << "Invalid rank transition " << oldwhoami << "->" << whoami << dendl;
respawn();
}

// Validate state transitions while I hold a rank
{
bool state_valid = true;
if (whoami != MDS_RANK_NONE && state != oldstate) {
if (oldstate == MDSMap::STATE_REPLAY) {
if (state != MDSMap::STATE_RESOLVE && state != MDSMap::STATE_RECONNECT) {
state_valid = false;
}
} else if (oldstate == MDSMap::STATE_REJOIN) {
if (state != MDSMap::STATE_ACTIVE
&& state != MDSMap::STATE_CLIENTREPLAY
&& state != MDSMap::STATE_STOPPED) {
state_valid = false;
}
} else if (oldstate >= MDSMap::STATE_RECONNECT && oldstate < MDSMap::STATE_ACTIVE) {
// Once I have entered replay, the only allowable transitions are to
// the next state along in the sequence.
if (state != oldstate + 1) {
state_valid = false;
}
}
}

if (!state_valid) {
derr << "Invalid state transition " << ceph_mds_state_name(oldstate)
<< "->" << ceph_mds_state_name(state) << dendl;
respawn();
}
}

// mark down any failed peers
for (map<mds_gid_t,MDSMap::mds_info_t>::const_iterator p = oldmap->get_mds_info().begin();
p != oldmap->get_mds_info().end();
Expand Down Expand Up @@ -1603,46 +1645,39 @@ void MDS::handle_mds_map(MMDSMap *m)
}
}

if (whoami < 0) {
if (state == MDSMap::STATE_STANDBY_REPLAY ||
state == MDSMap::STATE_ONESHOT_REPLAY) {
// fill in whoami from standby-for-rank. If we let this be changed
// the logic used to set it here will need to be adjusted.
whoami = mdsmap->get_mds_info_gid(mds_gid_t(monc->get_global_id())).standby_for_rank;
if (whoami == MDS_RANK_NONE) {
if (want_state == MDSMap::STATE_STANDBY) {
dout(10) << "dropped out of mdsmap, try to re-add myself" << dendl;
state = MDSMap::STATE_BOOT;
set_want_state(state);
goto out;
} else if (want_state == MDSMap::STATE_BOOT) {
dout(10) << "not in map yet" << dendl;
} else {
if (want_state == MDSMap::STATE_STANDBY) {
dout(10) << "dropped out of mdsmap, try to re-add myself" << dendl;
state = MDSMap::STATE_BOOT;
set_want_state(state);
goto out;
// did i get kicked by someone else?
if (g_conf->mds_enforce_unique_name) {
if (mds_gid_t existing = mdsmap->find_mds_gid_by_name(name)) {
MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
if (i.global_id > monc->get_global_id()) {
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, new instance has larger gid " << i.global_id
<< ", suicide" << dendl;
// Call suicide() rather than respawn() because if someone else
// has taken our ID, we don't want to keep restarting and
// fighting them for the ID.
suicide();
goto out;
}
}
}
if (want_state == MDSMap::STATE_BOOT) {
dout(10) << "not in map yet" << dendl;
} else {
// did i get kicked by someone else?
if (g_conf->mds_enforce_unique_name) {
if (mds_gid_t existing = mdsmap->find_mds_gid_by_name(name)) {
MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
if (i.global_id > monc->get_global_id()) {
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, new instance has larger gid " << i.global_id
<< ", suicide" << dendl;
suicide();
goto out;
}
}
}

dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, respawning myself" << dendl;
respawn();
}
goto out;
dout(1) << "handle_mds_map i (" << addr
<< ") dne in the mdsmap, respawning myself" << dendl;
respawn();
}
goto out;
}

// ??

if (oldwhoami != whoami || oldstate != state) {
// update messenger.
if (state == MDSMap::STATE_STANDBY_REPLAY || state == MDSMap::STATE_ONESHOT_REPLAY) {
Expand Down Expand Up @@ -2385,6 +2420,8 @@ void MDS::handle_signal(int signum)

void MDS::damaged()
{
assert(whoami != MDS_RANK_NONE);

set_want_state(MDSMap::STATE_DAMAGED);
monc->flush_log(); // Flush any clog error from before we were called
beacon.notify_health(this); // Include latest status in our swan song
Expand All @@ -2397,11 +2434,18 @@ void MDS::damaged()
respawn(); // Respawn into standby in case mon has other work for us
}

void MDS::suicide()
void MDS::suicide(bool fast)
{
assert(mds_lock.is_locked());
set_want_state(MDSMap::STATE_DNE); // whatever.

if (!fast && !mdsmap->is_dne_gid(mds_gid_t(monc->get_global_id()))) {
// Notify the MDSMonitor that we're dying, so that it doesn't have to
// wait for us to go laggy. Only do this if we're actually in the
// MDSMap, because otherwise the MDSMonitor will drop our message.
beacon.send_and_wait(1);
}

dout(1) << "suicide. wanted " << ceph_mds_state_name(want_state)
<< ", now " << ceph_mds_state_name(state) << dendl;

Expand Down Expand Up @@ -2482,7 +2526,7 @@ void MDS::respawn()

dout(0) << "respawn execv " << orig_argv[0]
<< " failed with " << cpp_strerror(errno) << dendl;
suicide();
suicide(true);
}

void MDS::handle_write_error(int err)
Expand Down
14 changes: 13 additions & 1 deletion src/mds/MDS.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,19 @@ class MDS : public Dispatcher, public md_config_obs_t {
* through cleaner scrub/repair mechanisms.
*/
void damaged();
void suicide();

/**
* Terminate this daemon process.
*
* @param fast: if true, do not send a message to the mon before shutting
* down
*/
void suicide(bool fast = false);

/**
* Start a new daemon process with the same command line parameters that
* this process was run with, then terminate this process
*/
void respawn();
void handle_write_error(int err);

Expand Down
2 changes: 1 addition & 1 deletion src/mds/RecoveryQueue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime)
if (r != 0) {
dout(0) << "recovery error! " << r << dendl;
if (r == -EBLACKLISTED) {
mds->suicide();
mds->respawn();
return;
}
assert(0 == "unexpected error from osd during recovery");
Expand Down
17 changes: 17 additions & 0 deletions src/mon/MDSMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,21 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m)
// after we remove the instance from the map.
pending_mdsmap.mds_info.erase(gid);

// Respond to MDS, so that it knows it can continue to shut down
mon->send_reply(m, new MMDSBeacon(mon->monmap->fsid, m->get_global_id(),
m->get_name(), mdsmap.get_epoch(), state, seq));
} else if (state == MDSMap::STATE_DNE) {
if (!mon->osdmon()->is_writeable()) {
dout(4) << __func__ << ": DNE from rank " << info.rank
<< " waiting for osdmon writeable to blacklist it" << dendl;
mon->osdmon()->wait_for_writeable(new C_RetryMessage(this, m));
return false;
}

fail_mds_gid(gid);
assert(mon->osdmon()->is_writeable());
request_proposal(mon->osdmon());

// Respond to MDS, so that it knows it can continue to shut down
mon->send_reply(m, new MMDSBeacon(mon->monmap->fsid, m->get_global_id(),
m->get_name(), mdsmap.get_epoch(), state, seq));
Expand Down Expand Up @@ -940,6 +955,8 @@ void MDSMonitor::fail_mds_gid(mds_gid_t gid)
}

pending_mdsmap.mds_info.erase(gid);

last_beacon.erase(gid);
}

mds_gid_t MDSMonitor::gid_from_arg(const std::string& arg, std::ostream &ss)
Expand Down

0 comments on commit e585ddf

Please sign in to comment.