Skip to content

Commit

Permalink
mon: MMonProbe: direct MMonJoin messages to the leader, instead of th…
Browse files Browse the repository at this point in the history
…e first mon

When monitors are joining a cluster, they may send an MMonJoin message to place
themselves correctly in the map in either handle_probe_reply() or
finish_election(). These messages must be sent to the leader -- monitors do not
forward each other's messages.

Unfortunately, this scenario was missed when converting the monitors to support
connectivity-based elections, and they're sending these messages to
quorum.begin(). Fix this by including an explicit leader in MMonProbe (that the
new monitor may reference in handle_probe_reply) and using the leader
value in both locations.

Fixes: https://tracker.ceph.com/issues/50345

Signed-off-by: Greg Farnum <gfarnum@redhat.com>
  • Loading branch information
gregsfortytwo committed Apr 13, 2021
1 parent 05861ca commit ffa7ff3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
10 changes: 9 additions & 1 deletion src/messages/MMonProbe.h
Expand Up @@ -23,7 +23,7 @@

class MMonProbe final : public Message {
public:
static constexpr int HEAD_VERSION = 7;
static constexpr int HEAD_VERSION = 8;
static constexpr int COMPAT_VERSION = 5;

enum {
Expand Down Expand Up @@ -51,6 +51,7 @@ class MMonProbe final : public Message {
int32_t op = 0;
std::string name;
std::set<int32_t> quorum;
int leader = -1;
ceph::buffer::list monmap_bl;
version_t paxos_first_version = 0;
version_t paxos_last_version = 0;
Expand Down Expand Up @@ -79,6 +80,7 @@ class MMonProbe final : public Message {
out << "mon_probe(" << get_opname(op) << " " << fsid << " name " << name;
if (quorum.size())
out << " quorum " << quorum;
out << " leader " << leader;
if (op == OP_REPLY) {
out << " paxos("
<< " fc " << paxos_first_version
Expand Down Expand Up @@ -116,6 +118,7 @@ class MMonProbe final : public Message {
encode(paxos_last_version, payload);
encode(required_features, payload);
encode(mon_release, payload);
encode(leader, payload);
}
void decode_payload() override {
using ceph::decode;
Expand All @@ -136,6 +139,11 @@ class MMonProbe final : public Message {
decode(mon_release, p);
else
mon_release = ceph_release_t::unknown;
if (header.version >= 8) {
decode(leader, p);
} else if (quorum.size()) {
leader = *quorum.begin();
}
}
private:
template<class T, typename... Args>
Expand Down
5 changes: 3 additions & 2 deletions src/mon/Monitor.cc
Expand Up @@ -1954,6 +1954,7 @@ void Monitor::handle_probe_probe(MonOpRequestRef op)
ceph_release());
r->name = name;
r->quorum = quorum;
r->leader = leader;
monmap->encode(r->monmap_bl, m->get_connection()->get_features());
r->paxos_first_version = paxos->get_first_committed();
r->paxos_last_version = paxos->get_version();
Expand Down Expand Up @@ -2121,7 +2122,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
send_mon_message(new MMonJoin(monmap->fsid, name,
messenger->get_myaddrs(), crush_loc,
need_set_crush_loc),
*m->quorum.begin());
m->leader);
}
} else {
if (monmap->contains(m->name)) {
Expand Down Expand Up @@ -2396,7 +2397,7 @@ void Monitor::finish_election()
<< map_crush_loc <<" -> " << name << "/" << crush_loc << dendl;
send_mon_message(new MMonJoin(monmap->fsid, name, messenger->get_myaddrs(),
crush_loc, need_set_crush_loc),
*quorum.begin());
leader);
return;
}
do_stretch_mode_election_work();
Expand Down

0 comments on commit ffa7ff3

Please sign in to comment.