From 0023cff188df7654bf2e4e8980cc83307e93ec71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Sun, 6 Nov 2022 23:57:11 +0800 Subject: [PATCH] Fix: delay leader step down When a membership that removes the leader is committed, the leader continue to work for a short while before reverting to a learner. This way, let the leader replicate the `membership-log-is-committed` message to followers. Otherwise, if the leader step down at once, the follower might have to re-commit the membership log again. After committing the membership log that does not contain the leader, the leader will step down in the next `tick`. --- openraft/src/core/raft_core.rs | 48 ++++++++----------- openraft/src/engine/engine_impl.rs | 33 +++++++++++-- .../tests/membership/t30_remove_leader.rs | 7 ++- 3 files changed, 55 insertions(+), 33 deletions(-) diff --git a/openraft/src/core/raft_core.rs b/openraft/src/core/raft_core.rs index 423b77c9a..94b6e8633 100644 --- a/openraft/src/core/raft_core.rs +++ b/openraft/src/core/raft_core.rs @@ -237,7 +237,7 @@ impl, S: RaftStorage> RaftCore) -> Result<(), Fatal> { tracing::debug!("raft node is initializing"); @@ -734,7 +734,7 @@ impl, S: RaftStorage> RaftCore, S: RaftStorage> RaftCore, S: RaftStorage> RaftCore, S: RaftStorage> RaftCore { @@ -1092,7 +1074,7 @@ impl, S: RaftStorage> RaftCore) -> Result<(), Fatal> { loop { self.flush_metrics(); @@ -1125,7 +1107,7 @@ impl, S: RaftStorage> RaftCore) { let members = self.engine.state.membership_state.effective.voter_ids(); @@ -1353,6 +1335,14 @@ impl, S: RaftStorage> RaftCore>(&[]).await?; } RaftMsg::HigherVote { @@ -1443,8 +1433,10 @@ impl, S: RaftStorage> RaftCore, S: RaftStorage> RaftRuntime ref upto, } => { self.apply_to_state_machine(committed.next_index(), upto.index).await?; - // Stepping down should be controlled by Engine. - self.leader_step_down(); } Command::FollowerCommit { already_committed: ref committed, diff --git a/openraft/src/engine/engine_impl.rs b/openraft/src/engine/engine_impl.rs index de220bd01..95177ef99 100644 --- a/openraft/src/engine/engine_impl.rs +++ b/openraft/src/engine/engine_impl.rs @@ -734,6 +734,33 @@ where } } + /// Leader steps down(convert to learner) once the membership not containing it is committed. + /// + /// This is only called by leader. + #[tracing::instrument(level = "debug", skip_all)] + pub(crate) fn leader_step_down(&mut self) { + tracing::debug!("leader_step_down: node_id:{}", self.id); + + // Step down: + // Keep acting as leader until a membership without this node is committed. + let em = &self.state.membership_state.effective; + + tracing::debug!( + "membership: {}, committed: {}, is_leading: {}", + em.summary(), + self.state.committed.summary(), + self.is_leading(), + ); + + #[allow(clippy::collapsible_if)] + if em.log_id <= self.state.committed { + if !em.is_voter(&self.id) && self.is_leading() { + tracing::debug!("leader {} is stepping down", self.id); + self.enter_following(); + } + } + } + /// Follower/Learner handles install-snapshot. #[tracing::instrument(level = "debug", skip_all)] pub(crate) fn install_snapshot(&mut self, meta: SnapshotMeta) { @@ -1154,13 +1181,13 @@ where tracing::debug!( is_member = display(self.is_voter()), is_leader = display(self.is_leader()), - is_becoming_leader = display(self.is_becoming_leader()), + is_leading = display(self.is_leading()), "states" ); if self.is_voter() { if self.is_leader() { ServerState::Leader - } else if self.is_becoming_leader() { + } else if self.is_leading() { ServerState::Candidate } else { ServerState::Follower @@ -1175,7 +1202,7 @@ where } /// The node is candidate or leader - fn is_becoming_leader(&self) -> bool { + fn is_leading(&self) -> bool { self.state.internal_server_state.is_leading() } diff --git a/openraft/tests/membership/t30_remove_leader.rs b/openraft/tests/membership/t30_remove_leader.rs index b51ec1e5d..75925a805 100644 --- a/openraft/tests/membership/t30_remove_leader.rs +++ b/openraft/tests/membership/t30_remove_leader.rs @@ -137,7 +137,12 @@ async fn remove_leader_access_new_cluster() -> Result<()> { router .wait(&2, timeout()) - .log(Some(log_index), "new leader node-2 commits 2 membership log") + // The last_applied may not be updated on follower nodes: + // because leader node-0 will steps down at once when the second membership log is committed. + .metrics( + |x| x.last_log_index == Some(log_index), + "new leader node-2 commits 2 membership log", + ) .await?; }