From 1b4bdd59f16544cdba262a5c3f1b6bf22b796f72 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Tue, 24 Oct 2023 19:48:43 +0100 Subject: [PATCH] ignore old leader's leases revoking request Signed-off-by: Benjamin Wang --- server/etcdserver/server.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 4b40e32bada2..41ecd5f02fcb 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -859,7 +859,18 @@ func (s *EtcdServer) run() { func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) { s.GoAttach(func() { + // We shouldn't revoke any leases if current member isn't a leader, + // because the operation should only be performed by the leader. When + // the leader gets blocked on the raft loop, such as writing WAL entries, + // it can't process any events or messages from raft. It may think it + // is still the leader even the leader has already changed. + // Refer to https://github.com/etcd-io/etcd/issues/15247 lg := s.Logger() + if !s.ensureLeadership() { + lg.Warn("Ignore the lease revoking request because current member isn't a leader") + return + } + // Increases throughput of expired leases deletion process through parallelization c := make(chan struct{}, maxPendingRevokes) for _, curLease := range leases { @@ -892,6 +903,26 @@ func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) { }) } +// ensureLeadership checks whether current member is still the leader. +func (s *EtcdServer) ensureLeadership() bool { + lg := s.Logger() + if err := s.linearizableReadNotify(s.ctx); err != nil { + lg.Warn("Failed to check current member's leadership", + zap.Error(err)) + return false + } + + newLeaderId := s.raftStatus().Lead + if newLeaderId != uint64(s.MemberId()) { + lg.Warn("Current member isn't a leader", + zap.Uint64("local-member-id", uint64(s.MemberId())), + zap.Uint64("new-lead", newLeaderId)) + return false + } + + return true +} + // Cleanup removes allocated objects by EtcdServer.NewServer in // situation that EtcdServer::Start was not called (that takes care of cleanup). func (s *EtcdServer) Cleanup() {