Skip to content

Commit 45df2fb

Browse files
committed
FAB-15210 fix Raft UT flake
Recall that Raft reconfiguration is done with two rounds of consensus: 1) Config Block first 2) then Raft config entry To precisely mimic leader failover between 1) and 2) in Raft UT, we monitor number of Raft message being disseminated by leader, and disconnect network at certain point of time. However, in some cases, leader might optimistically replicate Raft config entry along with the 2nd round of MsgApp for Config block (recall 2 phase commit in Raft). In such case, we need to pick the follower with higher index to be elected as new leader, otherwise a node may never be able to claim leadership and the test would fail. In addition, this CR also reduces LeaderCheckInterval to 500ms to speed up test run. It was set to 10s by default. Change-Id: I7e735d5280928152420c174bfd2b1d54cbae4f25 Signed-off-by: Jay Guo <guojiannan1101@gmail.com>
1 parent e2fc1b4 commit 45df2fb

File tree

1 file changed

+45
-30
lines changed

1 file changed

+45
-30
lines changed

orderer/consensus/etcdraft/chain_test.go

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,7 +2302,16 @@ var _ = Describe("Chain", func() {
23022302
Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
23032303
})
23042304
c1.setStepFunc(step1)
2305-
network.elect(2)
2305+
2306+
// elect node with higher index
2307+
i2, _ := c2.storage.LastIndex() // err is always nil
2308+
i3, _ := c3.storage.LastIndex()
2309+
candidate := uint64(2)
2310+
if i3 > i2 {
2311+
candidate = 3
2312+
}
2313+
network.chains[candidate].cutter.CutNext = true
2314+
network.elect(candidate)
23062315

23072316
_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
23082317
meta := &common.Metadata{Value: raftmetabytes}
@@ -2325,7 +2334,6 @@ var _ = Describe("Chain", func() {
23252334
Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
23262335

23272336
By("submitting new transaction to follower")
2328-
c2.cutter.CutNext = true
23292337
err = c4.Order(env, 0)
23302338
Expect(err).NotTo(HaveOccurred())
23312339

@@ -2337,12 +2345,7 @@ var _ = Describe("Chain", func() {
23372345
// node 1 has been stopped should not write any block
23382346
Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
23392347

2340-
network.connect(1)
2341-
2342-
c2.clock.Increment(interval)
2343-
// check that former leader didn't get stuck and actually got resign signal,
2344-
// and once connected capable of communicating with rest of the replicas set
2345-
Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower})))
2348+
network.join(1, true)
23462349
Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
23472350
})
23482351

@@ -2400,8 +2403,15 @@ var _ = Describe("Chain", func() {
24002403
network.connect(i)
24012404
}
24022405

2403-
By("re-elect node 2 to be a leader")
2404-
network.elect(2)
2406+
// elect node with higher index
2407+
i2, _ := c2.storage.LastIndex() // err is always nil
2408+
i3, _ := c3.storage.LastIndex()
2409+
candidate := uint64(2)
2410+
if i3 > i2 {
2411+
candidate = 3
2412+
}
2413+
network.chains[candidate].cutter.CutNext = true
2414+
network.elect(candidate)
24052415

24062416
c4.start()
24072417
Expect(c4.WaitReady()).To(Succeed())
@@ -2411,12 +2421,9 @@ var _ = Describe("Chain", func() {
24112421
Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
24122422

24132423
By("submitting new transaction to follower")
2414-
c2.cutter.CutNext = true
24152424
err = c4.Order(env, 0)
24162425
Expect(err).NotTo(HaveOccurred())
24172426

2418-
c2.clock.Increment(interval)
2419-
24202427
// rest nodes are alive include a newly added, hence should write 2 blocks
24212428
Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
24222429
Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
@@ -2456,11 +2463,17 @@ var _ = Describe("Chain", func() {
24562463
Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
24572464
})
24582465

2459-
// electing new leader
2460-
network.elect(2)
2466+
// elect node with higher index
2467+
i2, _ := c2.storage.LastIndex() // err is always nil
2468+
i3, _ := c3.storage.LastIndex()
2469+
candidate := uint64(2)
2470+
if i3 > i2 {
2471+
candidate = 3
2472+
}
2473+
network.chains[candidate].cutter.CutNext = true
2474+
network.elect(candidate)
24612475

24622476
By("submitting new transaction to follower")
2463-
c2.cutter.CutNext = true
24642477
err = c3.Order(env, 0)
24652478
Expect(err).NotTo(HaveOccurred())
24662479

@@ -3459,20 +3472,21 @@ func newChain(timeout time.Duration, channel string, dataDir string, id uint64,
34593472
fakeFields := newFakeMetricsFields()
34603473

34613474
opts := etcdraft.Options{
3462-
RaftID: uint64(id),
3463-
Clock: clock,
3464-
TickInterval: interval,
3465-
ElectionTick: ELECTION_TICK,
3466-
HeartbeatTick: HEARTBEAT_TICK,
3467-
MaxSizePerMsg: 1024 * 1024,
3468-
MaxInflightBlocks: 256,
3469-
BlockMetadata: raftMetadata,
3470-
Consenters: consenters,
3471-
Logger: flogging.NewFabricLogger(zap.NewExample()),
3472-
MemoryStorage: storage,
3473-
WALDir: path.Join(dataDir, "wal"),
3474-
SnapDir: path.Join(dataDir, "snapshot"),
3475-
Metrics: newFakeMetrics(fakeFields),
3475+
RaftID: uint64(id),
3476+
Clock: clock,
3477+
TickInterval: interval,
3478+
ElectionTick: ELECTION_TICK,
3479+
HeartbeatTick: HEARTBEAT_TICK,
3480+
MaxSizePerMsg: 1024 * 1024,
3481+
MaxInflightBlocks: 256,
3482+
BlockMetadata: raftMetadata,
3483+
LeaderCheckInterval: 500 * time.Millisecond,
3484+
Consenters: consenters,
3485+
Logger: flogging.NewFabricLogger(zap.NewExample()),
3486+
MemoryStorage: storage,
3487+
WALDir: path.Join(dataDir, "wal"),
3488+
SnapDir: path.Join(dataDir, "snapshot"),
3489+
Metrics: newFakeMetrics(fakeFields),
34763490
}
34773491

34783492
support := &consensusmocks.FakeConsenterSupport{}
@@ -3901,6 +3915,7 @@ func (n *network) elect(id uint64) {
39013915
n.RUnlock()
39023916

39033917
// Send node an artificial MsgTimeoutNow to emulate leadership transfer.
3918+
fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id)
39043919
candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow})}, 0)
39053920
Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader)))
39063921

0 commit comments

Comments
 (0)