Skip to content

Commit 858aaa9

Browse files
guogerC0rWin
authored andcommitted
[FAB-11918] Add WAL to persist raft log
This CR adds WAL to etcdraft to persist log entries, so that an etcdraft based chain can continue from where it's left. The index of log entry is persisted with block as its metadata so written blocks are not replayed. Change-Id: I7e4131541d90b256d767a9421ab6ae2a47a88bcd Signed-off-by: Jay Guo <guojiannan1101@gmail.com>
1 parent cf49094 commit 858aaa9

File tree

7 files changed

+497
-66
lines changed

7 files changed

+497
-66
lines changed

Gopkg.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ required = [
66
"golang.org/x/lint/golint",
77
"golang.org/x/tools/cmd/goimports",
88
"github.com/golang/protobuf/protoc-gen-go",
9-
"github.com/coreos/etcd/wal"
109
]
1110

1211
ignored = [

integration/nwo/orderer_template.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,7 @@ Kafka:
8989
Debug:
9090
BroadcastTraceDir:
9191
DeliverTraceDir:
92+
Consensus:
93+
WALDir: {{ .OrdererDir Orderer }}/etcdraft/wal
9294
{{- end }}
9395
`

orderer/consensus/etcdraft/chain.go

Lines changed: 158 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,16 @@ import (
1010
"context"
1111
"encoding/pem"
1212
"fmt"
13+
"os"
14+
"reflect"
1315
"sync/atomic"
1416
"time"
1517

1618
"code.cloudfoundry.org/clock"
1719
"github.com/coreos/etcd/raft"
1820
"github.com/coreos/etcd/raft/raftpb"
21+
"github.com/coreos/etcd/wal"
22+
"github.com/coreos/etcd/wal/walpb"
1923
"github.com/golang/protobuf/proto"
2024
"github.com/hyperledger/fabric/common/configtx"
2125
"github.com/hyperledger/fabric/common/flogging"
@@ -35,6 +39,7 @@ import (
3539
type Storage interface {
3640
raft.Storage
3741
Append(entries []raftpb.Entry) error
42+
SetHardState(st raftpb.HardState) error
3843
}
3944

4045
//go:generate mockery -dir . -name Configurator -case underscore -output ./mocks/
@@ -53,12 +58,22 @@ type RPC interface {
5358
SendSubmit(dest uint64, request *orderer.SubmitRequest) error
5459
}
5560

61+
type block struct {
62+
b *common.Block
63+
64+
// i is the etcd/raft entry Index associated with block.
65+
// it is persisted as block metatdata so we know where
66+
// to continue rafting upon reboot.
67+
i uint64
68+
}
69+
5670
// Options contains all the configurations relevant to the chain.
5771
type Options struct {
5872
RaftID uint64
5973

6074
Clock clock.Clock
6175

76+
WALDir string
6277
Storage Storage
6378
Logger *flogging.FabricLogger
6479

@@ -67,7 +82,8 @@ type Options struct {
6782
HeartbeatTick int
6883
MaxSizePerMsg uint64
6984
MaxInflightMsgs int
70-
RaftMetadata *etcdraft.RaftMetadata
85+
86+
RaftMetadata *etcdraft.RaftMetadata
7187
}
7288

7389
// Chain implements consensus.Chain interface.
@@ -79,7 +95,7 @@ type Chain struct {
7995
channelID string
8096

8197
submitC chan *orderer.SubmitRequest
82-
commitC chan *common.Block
98+
commitC chan block
8399
observeC chan<- uint64 // Notifies external observer on leader change (passed in optionally as an argument for tests)
84100
haltC chan struct{} // Signals to goroutines that the chain is halting
85101
doneC chan struct{} // Closes when the chain halts
@@ -93,8 +109,11 @@ type Chain struct {
93109
leader uint64
94110
appliedIndex uint64
95111

112+
hasWAL bool // indicate if this is a fresh raft node
113+
96114
node raft.Node
97115
storage Storage
116+
wal *wal.WAL
98117
opts Options
99118

100119
logger *flogging.FabricLogger
@@ -107,29 +126,44 @@ func NewChain(
107126
conf Configurator,
108127
rpc RPC,
109128
observeC chan<- uint64) (*Chain, error) {
129+
130+
lg := opts.Logger.With("channel", support.ChainID(), "node", opts.RaftID)
131+
132+
applied := opts.RaftMetadata.RaftIndex
133+
w, hasWAL, err := replayWAL(lg, applied, opts.WALDir, opts.Storage)
134+
if err != nil {
135+
return nil, errors.Errorf("failed to create chain: %s", err)
136+
}
137+
110138
return &Chain{
111139
configurator: conf,
112140
rpc: rpc,
113141
channelID: support.ChainID(),
114142
raftID: opts.RaftID,
115143
submitC: make(chan *orderer.SubmitRequest),
116-
commitC: make(chan *common.Block),
144+
commitC: make(chan block),
117145
haltC: make(chan struct{}),
118146
doneC: make(chan struct{}),
119147
resignC: make(chan struct{}),
120148
startC: make(chan struct{}),
121149
observeC: observeC,
122150
support: support,
151+
hasWAL: hasWAL,
152+
appliedIndex: applied,
123153
clock: opts.Clock,
124-
logger: opts.Logger.With("channel", support.ChainID(), "node", opts.RaftID),
154+
logger: lg,
125155
storage: opts.Storage,
156+
wal: w,
126157
opts: opts,
127158
}, nil
128159
}
129160

130161
// Start instructs the orderer to begin serving the chain and keep it current.
131162
func (c *Chain) Start() {
132163
c.logger.Infof("Starting Raft node")
164+
165+
// DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217
166+
// We guard against replay of written blocks in `entriesToApply` instead.
133167
config := &raft.Config{
134168
ID: c.raftID,
135169
ElectionTick: c.opts.ElectionTick,
@@ -149,7 +183,13 @@ func (c *Chain) Start() {
149183

150184
raftPeers := RaftPeers(c.opts.RaftMetadata.Consenters)
151185

152-
c.node = raft.StartNode(config, raftPeers)
186+
if !c.hasWAL {
187+
c.logger.Infof("starting new raft node %d", c.raftID)
188+
c.node = raft.StartNode(config, raftPeers)
189+
} else {
190+
c.logger.Infof("restarting raft node %d", c.raftID)
191+
c.node = raft.RestartNode(config)
192+
}
153193

154194
close(c.startC)
155195

@@ -372,14 +412,16 @@ func (c *Chain) serveRequest() {
372412
}
373413
}
374414

375-
func (c *Chain) writeBlock(b *common.Block) {
376-
metadata := utils.MarshalOrPanic(c.opts.RaftMetadata)
377-
if utils.IsConfigBlock(b) {
378-
c.support.WriteConfigBlock(b, metadata)
415+
func (c *Chain) writeBlock(b block) {
416+
c.opts.RaftMetadata.RaftIndex = b.i
417+
m := utils.MarshalOrPanic(c.opts.RaftMetadata)
418+
419+
if utils.IsConfigBlock(b.b) {
420+
c.support.WriteConfigBlock(b.b, m)
379421
return
380422
}
381423

382-
c.support.WriteBlock(b, metadata)
424+
c.support.WriteBlock(b.b, m)
383425
}
384426

385427
// Orders the envelope in the `msg` content. SubmitRequest.
@@ -450,7 +492,10 @@ func (c *Chain) serveRaft() {
450492

451493
case rd := <-c.node.Ready():
452494
c.storage.Append(rd.Entries)
453-
c.apply(c.entriesToApply(rd.CommittedEntries))
495+
if err := c.wal.Save(rd.HardState, rd.Entries); err != nil {
496+
c.logger.Panicf("failed to persist hardstate and entries to wal: %s", err)
497+
}
498+
c.apply(rd.CommittedEntries)
454499
c.node.Advance()
455500
c.send(rd.Messages)
456501

@@ -474,24 +519,35 @@ func (c *Chain) serveRaft() {
474519
}
475520

476521
case <-c.haltC:
477-
close(c.doneC)
478522
ticker.Stop()
479523
c.node.Stop()
524+
c.wal.Close()
480525
c.logger.Infof("Raft node %x stopped", c.raftID)
526+
close(c.doneC) // close after all the artifacts are closed
481527
return
482528
}
483529
}
484530
}
485531

486532
func (c *Chain) apply(ents []raftpb.Entry) {
533+
if len(ents) == 0 {
534+
return
535+
}
536+
537+
if ents[0].Index > c.appliedIndex+1 {
538+
c.logger.Panicf("first index of committed entry[%d] should <= appliedIndex[%d]+1", ents[0].Index, c.appliedIndex)
539+
}
540+
487541
for i := range ents {
488542
switch ents[i].Type {
489543
case raftpb.EntryNormal:
490-
if len(ents[i].Data) == 0 {
544+
// We need to strictly avoid re-applying normal entries,
545+
// otherwise we are writing the same block twice.
546+
if len(ents[i].Data) == 0 || ents[i].Index <= c.appliedIndex {
491547
break
492548
}
493549

494-
c.commitC <- utils.UnmarshalBlockOrPanic(ents[i].Data)
550+
c.commitC <- block{utils.UnmarshalBlockOrPanic(ents[i].Data), ents[i].Index}
495551

496552
case raftpb.EntryConfChange:
497553
var cc raftpb.ConfChange
@@ -503,7 +559,9 @@ func (c *Chain) apply(ents []raftpb.Entry) {
503559
c.node.ApplyConfChange(cc)
504560
}
505561

506-
c.appliedIndex = ents[i].Index
562+
if ents[i].Index > c.appliedIndex {
563+
c.appliedIndex = ents[i].Index
564+
}
507565
}
508566
}
509567

@@ -522,27 +580,6 @@ func (c *Chain) send(msgs []raftpb.Message) {
522580
}
523581
}
524582

525-
// this is taken from coreos/contrib/raftexample/raft.go
526-
func (c *Chain) entriesToApply(ents []raftpb.Entry) (nents []raftpb.Entry) {
527-
if len(ents) == 0 {
528-
return
529-
}
530-
531-
firstIdx := ents[0].Index
532-
if firstIdx > c.appliedIndex+1 {
533-
c.logger.Panicf("first index of committed entry[%d] should <= progress.appliedIndex[%d]+1", firstIdx, c.appliedIndex)
534-
}
535-
536-
// If we do have unapplied entries in nents.
537-
// | applied | unapplied |
538-
// |----------------|----------------------|
539-
// firstIdx appliedIndex last
540-
if c.appliedIndex-firstIdx+1 < uint64(len(ents)) {
541-
nents = ents[c.appliedIndex-firstIdx+1:]
542-
}
543-
return nents
544-
}
545-
546583
func (c *Chain) isConfig(env *common.Envelope) bool {
547584
h, err := utils.ChannelHeader(env)
548585
if err != nil {
@@ -613,3 +650,88 @@ func (c *Chain) checkConsentersSet(configValue *common.ConfigValue) error {
613650

614651
return nil
615652
}
653+
654+
func (c *Chain) consentersChanged(newConsenters []*etcdraft.Consenter) bool {
655+
if len(c.opts.RaftMetadata.Consenters) != len(newConsenters) {
656+
return false
657+
}
658+
659+
consentersSet1 := c.membershipByCert()
660+
consentersSet2 := c.consentersToMap(newConsenters)
661+
662+
return reflect.DeepEqual(consentersSet1, consentersSet2)
663+
}
664+
665+
func (c *Chain) membershipByCert() map[string]struct{} {
666+
set := map[string]struct{}{}
667+
for _, c := range c.opts.RaftMetadata.Consenters {
668+
set[string(c.ClientTlsCert)] = struct{}{}
669+
}
670+
return set
671+
}
672+
673+
func (c *Chain) consentersToMap(consenters []*etcdraft.Consenter) map[string]struct{} {
674+
set := map[string]struct{}{}
675+
for _, c := range consenters {
676+
set[string(c.ClientTlsCert)] = struct{}{}
677+
}
678+
return set
679+
}
680+
681+
func (c *Chain) membershipToRaftPeers() []raft.Peer {
682+
var peers []raft.Peer
683+
684+
for raftID := range c.opts.RaftMetadata.Consenters {
685+
peers = append(peers, raft.Peer{ID: raftID})
686+
}
687+
return peers
688+
}
689+
690+
func replayWAL(lg *flogging.FabricLogger, applied uint64, walDir string, storage Storage) (*wal.WAL, bool, error) {
691+
hasWAL := wal.Exist(walDir)
692+
if !hasWAL && applied != 0 {
693+
return nil, hasWAL, errors.Errorf("applied index is not zero but no WAL data found")
694+
}
695+
696+
if !hasWAL {
697+
// wal.Create takes care of following cases by creating temp dir and atomically rename it:
698+
// - wal dir is a file
699+
// - wal dir is not readable/writeable
700+
//
701+
// TODO(jay_guo) store channel-related information in metadata when needed.
702+
// potential use case could be data dump and restore
703+
lg.Infof("No WAL data found, creating new WAL at path '%s'", walDir)
704+
w, err := wal.Create(walDir, nil)
705+
if err == os.ErrExist {
706+
lg.Fatalf("programming error, we've just checked that WAL does not exist")
707+
}
708+
709+
if err != nil {
710+
return nil, hasWAL, errors.Errorf("failed to initialize WAL: %s", err)
711+
}
712+
713+
if err = w.Close(); err != nil {
714+
return nil, hasWAL, errors.Errorf("failed to close the WAL just created: %s", err)
715+
}
716+
} else {
717+
lg.Infof("Found WAL data at path '%s', replaying it", walDir)
718+
}
719+
720+
w, err := wal.Open(walDir, walpb.Snapshot{})
721+
if err != nil {
722+
return nil, hasWAL, errors.Errorf("failed to open existing WAL: %s", err)
723+
}
724+
725+
_, st, ents, err := w.ReadAll()
726+
if err != nil {
727+
return nil, hasWAL, errors.Errorf("failed to read WAL: %s", err)
728+
}
729+
730+
lg.Debugf("Setting HardState to {Term: %d, Commit: %d}", st.Term, st.Commit)
731+
storage.SetHardState(st) // MemoryStorage.SetHardState always returns nil
732+
733+
lg.Debugf("Appending %d entries to memory storage", len(ents))
734+
storage.Append(ents) // MemoryStorage.Append always return nil
735+
736+
return w, hasWAL, nil
737+
}

0 commit comments

Comments
 (0)