@@ -10,12 +10,16 @@ import (
10
10
"context"
11
11
"encoding/pem"
12
12
"fmt"
13
+ "os"
14
+ "reflect"
13
15
"sync/atomic"
14
16
"time"
15
17
16
18
"code.cloudfoundry.org/clock"
17
19
"github.com/coreos/etcd/raft"
18
20
"github.com/coreos/etcd/raft/raftpb"
21
+ "github.com/coreos/etcd/wal"
22
+ "github.com/coreos/etcd/wal/walpb"
19
23
"github.com/golang/protobuf/proto"
20
24
"github.com/hyperledger/fabric/common/configtx"
21
25
"github.com/hyperledger/fabric/common/flogging"
@@ -35,6 +39,7 @@ import (
35
39
type Storage interface {
36
40
raft.Storage
37
41
Append (entries []raftpb.Entry ) error
42
+ SetHardState (st raftpb.HardState ) error
38
43
}
39
44
40
45
//go:generate mockery -dir . -name Configurator -case underscore -output ./mocks/
@@ -53,12 +58,22 @@ type RPC interface {
53
58
SendSubmit (dest uint64 , request * orderer.SubmitRequest ) error
54
59
}
55
60
61
+ type block struct {
62
+ b * common.Block
63
+
64
+ // i is the etcd/raft entry Index associated with block.
65
+ // it is persisted as block metatdata so we know where
66
+ // to continue rafting upon reboot.
67
+ i uint64
68
+ }
69
+
56
70
// Options contains all the configurations relevant to the chain.
57
71
type Options struct {
58
72
RaftID uint64
59
73
60
74
Clock clock.Clock
61
75
76
+ WALDir string
62
77
Storage Storage
63
78
Logger * flogging.FabricLogger
64
79
@@ -67,7 +82,8 @@ type Options struct {
67
82
HeartbeatTick int
68
83
MaxSizePerMsg uint64
69
84
MaxInflightMsgs int
70
- RaftMetadata * etcdraft.RaftMetadata
85
+
86
+ RaftMetadata * etcdraft.RaftMetadata
71
87
}
72
88
73
89
// Chain implements consensus.Chain interface.
@@ -79,7 +95,7 @@ type Chain struct {
79
95
channelID string
80
96
81
97
submitC chan * orderer.SubmitRequest
82
- commitC chan * common. Block
98
+ commitC chan block
83
99
observeC chan <- uint64 // Notifies external observer on leader change (passed in optionally as an argument for tests)
84
100
haltC chan struct {} // Signals to goroutines that the chain is halting
85
101
doneC chan struct {} // Closes when the chain halts
@@ -93,8 +109,11 @@ type Chain struct {
93
109
leader uint64
94
110
appliedIndex uint64
95
111
112
+ hasWAL bool // indicate if this is a fresh raft node
113
+
96
114
node raft.Node
97
115
storage Storage
116
+ wal * wal.WAL
98
117
opts Options
99
118
100
119
logger * flogging.FabricLogger
@@ -107,29 +126,44 @@ func NewChain(
107
126
conf Configurator ,
108
127
rpc RPC ,
109
128
observeC chan <- uint64 ) (* Chain , error ) {
129
+
130
+ lg := opts .Logger .With ("channel" , support .ChainID (), "node" , opts .RaftID )
131
+
132
+ applied := opts .RaftMetadata .RaftIndex
133
+ w , hasWAL , err := replayWAL (lg , applied , opts .WALDir , opts .Storage )
134
+ if err != nil {
135
+ return nil , errors .Errorf ("failed to create chain: %s" , err )
136
+ }
137
+
110
138
return & Chain {
111
139
configurator : conf ,
112
140
rpc : rpc ,
113
141
channelID : support .ChainID (),
114
142
raftID : opts .RaftID ,
115
143
submitC : make (chan * orderer.SubmitRequest ),
116
- commitC : make (chan * common. Block ),
144
+ commitC : make (chan block ),
117
145
haltC : make (chan struct {}),
118
146
doneC : make (chan struct {}),
119
147
resignC : make (chan struct {}),
120
148
startC : make (chan struct {}),
121
149
observeC : observeC ,
122
150
support : support ,
151
+ hasWAL : hasWAL ,
152
+ appliedIndex : applied ,
123
153
clock : opts .Clock ,
124
- logger : opts . Logger . With ( "channel" , support . ChainID (), "node" , opts . RaftID ) ,
154
+ logger : lg ,
125
155
storage : opts .Storage ,
156
+ wal : w ,
126
157
opts : opts ,
127
158
}, nil
128
159
}
129
160
130
161
// Start instructs the orderer to begin serving the chain and keep it current.
131
162
func (c * Chain ) Start () {
132
163
c .logger .Infof ("Starting Raft node" )
164
+
165
+ // DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217
166
+ // We guard against replay of written blocks in `entriesToApply` instead.
133
167
config := & raft.Config {
134
168
ID : c .raftID ,
135
169
ElectionTick : c .opts .ElectionTick ,
@@ -149,7 +183,13 @@ func (c *Chain) Start() {
149
183
150
184
raftPeers := RaftPeers (c .opts .RaftMetadata .Consenters )
151
185
152
- c .node = raft .StartNode (config , raftPeers )
186
+ if ! c .hasWAL {
187
+ c .logger .Infof ("starting new raft node %d" , c .raftID )
188
+ c .node = raft .StartNode (config , raftPeers )
189
+ } else {
190
+ c .logger .Infof ("restarting raft node %d" , c .raftID )
191
+ c .node = raft .RestartNode (config )
192
+ }
153
193
154
194
close (c .startC )
155
195
@@ -372,14 +412,16 @@ func (c *Chain) serveRequest() {
372
412
}
373
413
}
374
414
375
- func (c * Chain ) writeBlock (b * common.Block ) {
376
- metadata := utils .MarshalOrPanic (c .opts .RaftMetadata )
377
- if utils .IsConfigBlock (b ) {
378
- c .support .WriteConfigBlock (b , metadata )
415
+ func (c * Chain ) writeBlock (b block ) {
416
+ c .opts .RaftMetadata .RaftIndex = b .i
417
+ m := utils .MarshalOrPanic (c .opts .RaftMetadata )
418
+
419
+ if utils .IsConfigBlock (b .b ) {
420
+ c .support .WriteConfigBlock (b .b , m )
379
421
return
380
422
}
381
423
382
- c .support .WriteBlock (b , metadata )
424
+ c .support .WriteBlock (b . b , m )
383
425
}
384
426
385
427
// Orders the envelope in the `msg` content. SubmitRequest.
@@ -450,7 +492,10 @@ func (c *Chain) serveRaft() {
450
492
451
493
case rd := <- c .node .Ready ():
452
494
c .storage .Append (rd .Entries )
453
- c .apply (c .entriesToApply (rd .CommittedEntries ))
495
+ if err := c .wal .Save (rd .HardState , rd .Entries ); err != nil {
496
+ c .logger .Panicf ("failed to persist hardstate and entries to wal: %s" , err )
497
+ }
498
+ c .apply (rd .CommittedEntries )
454
499
c .node .Advance ()
455
500
c .send (rd .Messages )
456
501
@@ -474,24 +519,35 @@ func (c *Chain) serveRaft() {
474
519
}
475
520
476
521
case <- c .haltC :
477
- close (c .doneC )
478
522
ticker .Stop ()
479
523
c .node .Stop ()
524
+ c .wal .Close ()
480
525
c .logger .Infof ("Raft node %x stopped" , c .raftID )
526
+ close (c .doneC ) // close after all the artifacts are closed
481
527
return
482
528
}
483
529
}
484
530
}
485
531
486
532
func (c * Chain ) apply (ents []raftpb.Entry ) {
533
+ if len (ents ) == 0 {
534
+ return
535
+ }
536
+
537
+ if ents [0 ].Index > c .appliedIndex + 1 {
538
+ c .logger .Panicf ("first index of committed entry[%d] should <= appliedIndex[%d]+1" , ents [0 ].Index , c .appliedIndex )
539
+ }
540
+
487
541
for i := range ents {
488
542
switch ents [i ].Type {
489
543
case raftpb .EntryNormal :
490
- if len (ents [i ].Data ) == 0 {
544
+ // We need to strictly avoid re-applying normal entries,
545
+ // otherwise we are writing the same block twice.
546
+ if len (ents [i ].Data ) == 0 || ents [i ].Index <= c .appliedIndex {
491
547
break
492
548
}
493
549
494
- c .commitC <- utils .UnmarshalBlockOrPanic (ents [i ].Data )
550
+ c .commitC <- block { utils .UnmarshalBlockOrPanic (ents [i ].Data ), ents [ i ]. Index }
495
551
496
552
case raftpb .EntryConfChange :
497
553
var cc raftpb.ConfChange
@@ -503,7 +559,9 @@ func (c *Chain) apply(ents []raftpb.Entry) {
503
559
c .node .ApplyConfChange (cc )
504
560
}
505
561
506
- c .appliedIndex = ents [i ].Index
562
+ if ents [i ].Index > c .appliedIndex {
563
+ c .appliedIndex = ents [i ].Index
564
+ }
507
565
}
508
566
}
509
567
@@ -522,27 +580,6 @@ func (c *Chain) send(msgs []raftpb.Message) {
522
580
}
523
581
}
524
582
525
- // this is taken from coreos/contrib/raftexample/raft.go
526
- func (c * Chain ) entriesToApply (ents []raftpb.Entry ) (nents []raftpb.Entry ) {
527
- if len (ents ) == 0 {
528
- return
529
- }
530
-
531
- firstIdx := ents [0 ].Index
532
- if firstIdx > c .appliedIndex + 1 {
533
- c .logger .Panicf ("first index of committed entry[%d] should <= progress.appliedIndex[%d]+1" , firstIdx , c .appliedIndex )
534
- }
535
-
536
- // If we do have unapplied entries in nents.
537
- // | applied | unapplied |
538
- // |----------------|----------------------|
539
- // firstIdx appliedIndex last
540
- if c .appliedIndex - firstIdx + 1 < uint64 (len (ents )) {
541
- nents = ents [c .appliedIndex - firstIdx + 1 :]
542
- }
543
- return nents
544
- }
545
-
546
583
func (c * Chain ) isConfig (env * common.Envelope ) bool {
547
584
h , err := utils .ChannelHeader (env )
548
585
if err != nil {
@@ -613,3 +650,88 @@ func (c *Chain) checkConsentersSet(configValue *common.ConfigValue) error {
613
650
614
651
return nil
615
652
}
653
+
654
+ func (c * Chain ) consentersChanged (newConsenters []* etcdraft.Consenter ) bool {
655
+ if len (c .opts .RaftMetadata .Consenters ) != len (newConsenters ) {
656
+ return false
657
+ }
658
+
659
+ consentersSet1 := c .membershipByCert ()
660
+ consentersSet2 := c .consentersToMap (newConsenters )
661
+
662
+ return reflect .DeepEqual (consentersSet1 , consentersSet2 )
663
+ }
664
+
665
+ func (c * Chain ) membershipByCert () map [string ]struct {} {
666
+ set := map [string ]struct {}{}
667
+ for _ , c := range c .opts .RaftMetadata .Consenters {
668
+ set [string (c .ClientTlsCert )] = struct {}{}
669
+ }
670
+ return set
671
+ }
672
+
673
+ func (c * Chain ) consentersToMap (consenters []* etcdraft.Consenter ) map [string ]struct {} {
674
+ set := map [string ]struct {}{}
675
+ for _ , c := range consenters {
676
+ set [string (c .ClientTlsCert )] = struct {}{}
677
+ }
678
+ return set
679
+ }
680
+
681
+ func (c * Chain ) membershipToRaftPeers () []raft.Peer {
682
+ var peers []raft.Peer
683
+
684
+ for raftID := range c .opts .RaftMetadata .Consenters {
685
+ peers = append (peers , raft.Peer {ID : raftID })
686
+ }
687
+ return peers
688
+ }
689
+
690
+ func replayWAL (lg * flogging.FabricLogger , applied uint64 , walDir string , storage Storage ) (* wal.WAL , bool , error ) {
691
+ hasWAL := wal .Exist (walDir )
692
+ if ! hasWAL && applied != 0 {
693
+ return nil , hasWAL , errors .Errorf ("applied index is not zero but no WAL data found" )
694
+ }
695
+
696
+ if ! hasWAL {
697
+ // wal.Create takes care of following cases by creating temp dir and atomically rename it:
698
+ // - wal dir is a file
699
+ // - wal dir is not readable/writeable
700
+ //
701
+ // TODO(jay_guo) store channel-related information in metadata when needed.
702
+ // potential use case could be data dump and restore
703
+ lg .Infof ("No WAL data found, creating new WAL at path '%s'" , walDir )
704
+ w , err := wal .Create (walDir , nil )
705
+ if err == os .ErrExist {
706
+ lg .Fatalf ("programming error, we've just checked that WAL does not exist" )
707
+ }
708
+
709
+ if err != nil {
710
+ return nil , hasWAL , errors .Errorf ("failed to initialize WAL: %s" , err )
711
+ }
712
+
713
+ if err = w .Close (); err != nil {
714
+ return nil , hasWAL , errors .Errorf ("failed to close the WAL just created: %s" , err )
715
+ }
716
+ } else {
717
+ lg .Infof ("Found WAL data at path '%s', replaying it" , walDir )
718
+ }
719
+
720
+ w , err := wal .Open (walDir , walpb.Snapshot {})
721
+ if err != nil {
722
+ return nil , hasWAL , errors .Errorf ("failed to open existing WAL: %s" , err )
723
+ }
724
+
725
+ _ , st , ents , err := w .ReadAll ()
726
+ if err != nil {
727
+ return nil , hasWAL , errors .Errorf ("failed to read WAL: %s" , err )
728
+ }
729
+
730
+ lg .Debugf ("Setting HardState to {Term: %d, Commit: %d}" , st .Term , st .Commit )
731
+ storage .SetHardState (st ) // MemoryStorage.SetHardState always returns nil
732
+
733
+ lg .Debugf ("Appending %d entries to memory storage" , len (ents ))
734
+ storage .Append (ents ) // MemoryStorage.Append always return nil
735
+
736
+ return w , hasWAL , nil
737
+ }
0 commit comments