From 8b3eef0373f5c94ad3e4e357d3085a6c655708de Mon Sep 17 00:00:00 2001 From: Jason Yellick Date: Tue, 14 Jul 2020 15:28:19 -0400 Subject: [PATCH] Raft: Check suspect info once per suspect interval Today's existing suspect logic has a periodic checker, which checks every 10s if the Raft cluster still has quorum. If the cluster has lost quorum, it marks the time this event begins, then, every 10s checks to see if 'enough' time has elapsed since the quorum was lost to suspect that the OSN has been evicted. If the OSN has not been evicted, or cannot determine its eviction status, then every 10s the OSN attempts to re-check its suspicion status, which can lead to large volumes of network traffic, especially in significiantly multichannel environments. This commit modifies the logic to track the number of times that the suspect checking logic has actually executed, to ensure that we check no more than once every suspect interval (by default every 10m, instead of every 10s). Signed-off-by: Jason Yellick --- orderer/consensus/etcdraft/chain.go | 1 + orderer/consensus/etcdraft/eviction.go | 14 +++++++++++++- orderer/consensus/etcdraft/eviction_test.go | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/orderer/consensus/etcdraft/chain.go b/orderer/consensus/etcdraft/chain.go index 15e02cd2c8c..7c14a238fc3 100644 --- a/orderer/consensus/etcdraft/chain.go +++ b/orderer/consensus/etcdraft/chain.go @@ -369,6 +369,7 @@ func (c *Chain) Start() { c.periodicChecker = &PeriodicCheck{ Logger: c.logger, Report: es.confirmSuspicion, + ReportCleared: es.clearSuspicion, CheckInterval: interval, Condition: c.suspectEviction, } diff --git a/orderer/consensus/etcdraft/eviction.go b/orderer/consensus/etcdraft/eviction.go index 5fc2fb6610c..7b007372e74 100644 --- a/orderer/consensus/etcdraft/eviction.go +++ b/orderer/consensus/etcdraft/eviction.go @@ -26,6 +26,7 @@ type PeriodicCheck struct { CheckInterval time.Duration Condition func() bool Report func(cumulativePeriod time.Duration) + ReportCleared func() conditionHoldsSince time.Time once sync.Once // Used to prevent double initialization stopped uint32 @@ -60,6 +61,9 @@ func (pc *PeriodicCheck) check() { } func (pc *PeriodicCheck) conditionNotFulfilled() { + if pc.ReportCleared != nil && !pc.conditionHoldsSince.IsZero() { + pc.ReportCleared() + } pc.conditionHoldsSince = time.Time{} } @@ -81,12 +85,20 @@ type evictionSuspector struct { writeBlock func(block *common.Block) error triggerCatchUp func(sn *raftpb.Snapshot) halted bool + timesTriggered int +} + +func (es *evictionSuspector) clearSuspicion() { + es.timesTriggered = 0 } func (es *evictionSuspector) confirmSuspicion(cumulativeSuspicion time.Duration) { - if es.evictionSuspicionThreshold > cumulativeSuspicion || es.halted { + // The goal here is to only execute the body of the function once every es.evictionSuspicionThreshold + if es.evictionSuspicionThreshold*time.Duration(es.timesTriggered+1) > cumulativeSuspicion || es.halted { return } + es.timesTriggered++ + es.logger.Infof("Suspecting our own eviction from the channel for %v", cumulativeSuspicion) puller, err := es.createPuller() if err != nil { diff --git a/orderer/consensus/etcdraft/eviction_test.go b/orderer/consensus/etcdraft/eviction_test.go index 9a304b5cab1..048810e1263 100644 --- a/orderer/consensus/etcdraft/eviction_test.go +++ b/orderer/consensus/etcdraft/eviction_test.go @@ -48,11 +48,18 @@ func TestPeriodicCheck(t *testing.T) { reports <- duration } + clears := make(chan struct{}, 1000) + + reportCleared := func() { + clears <- struct{}{} + } + check := &PeriodicCheck{ Logger: flogging.MustGetLogger("test"), Condition: condition, CheckInterval: time.Millisecond, Report: report, + ReportCleared: reportCleared, } go check.Run() @@ -85,6 +92,9 @@ func TestPeriodicCheck(t *testing.T) { } } + g.Eventually(clears).Should(gomega.Receive()) + g.Consistently(clears).ShouldNot(gomega.Receive()) + // ensure the checks have been made checksDoneSoFar := atomic.LoadUint32(&checkNum) g.Consistently(reports, time.Second*2, time.Millisecond).Should(gomega.BeEmpty()) @@ -107,6 +117,7 @@ func TestPeriodicCheck(t *testing.T) { time.Sleep(check.CheckInterval * 50) // Ensure that we cease checking the condition, hence the PeriodicCheck is stopped. g.Expect(atomic.LoadUint32(&checkNum)).To(gomega.BeNumerically("<", checkCountAfterStop+2)) + g.Consistently(clears).ShouldNot(gomega.Receive()) } func TestEvictionSuspector(t *testing.T) { @@ -137,6 +148,7 @@ func TestEvictionSuspector(t *testing.T) { blockPuller BlockPuller blockPullerErr error height uint64 + timesTriggered int halt func() }{ { @@ -144,6 +156,12 @@ func TestEvictionSuspector(t *testing.T) { evictionSuspicionThreshold: 11 * time.Minute, halt: t.Fail, }, + { + description: "timesTriggered multiplier prevents threshold", + evictionSuspicionThreshold: 6 * time.Minute, + timesTriggered: 1, + halt: t.Fail, + }, { description: "puller creation fails", evictionSuspicionThreshold: 10*time.Minute - time.Second, @@ -219,6 +237,7 @@ func TestEvictionSuspector(t *testing.T) { }, logger: flogging.MustGetLogger("test"), triggerCatchUp: func(sn *raftpb.Snapshot) {}, + timesTriggered: testCase.timesTriggered, } foundExpectedLog := testCase.expectedLog == ""