Skip to content

Commit 2bfa4bb

Browse files
author
Pawan Rawal
committed
Return error if user tries to reuse RAFT id for a node that was removed.
1 parent 74efabe commit 2bfa4bb

File tree

7 files changed

+49
-23
lines changed

7 files changed

+49
-23
lines changed

dgraph/cmd/zero/raft.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ package zero
1010
import (
1111
"encoding/binary"
1212
"errors"
13+
"log"
1314
"math/rand"
1415
"sync"
1516
"time"
@@ -380,6 +381,18 @@ func (n *node) applyConfChange(e raftpb.Entry) {
380381
n.Connect(rc.Id, rc.Addr)
381382

382383
m := &intern.Member{Id: rc.Id, Addr: rc.Addr, GroupId: 0}
384+
385+
for _, member := range n.server.membershipState().Removed {
386+
// It is not recommended to reuse RAFT ids.
387+
if member.GroupId == 0 && m.Id == member.Id {
388+
n.DoneConfChange(cc.ID, x.ErrReuseRemovedId)
389+
// Cancel configuration change.
390+
cc.NodeID = raft.None
391+
n.Raft().ApplyConfChange(cc)
392+
return
393+
}
394+
}
395+
383396
n.server.storeZero(m)
384397
}
385398

@@ -429,14 +442,16 @@ func (n *node) initAndStartNode(wal *raftwal.Wal) error {
429442
time.Sleep(delay)
430443
ctx, cancel := context.WithTimeout(n.ctx, time.Second)
431444
defer cancel()
432-
// JoinCluster can block idefinitely, raft ignores conf change proposal
445+
// JoinCluster can block indefinitely, raft ignores conf change proposal
433446
// if it has pending configuration.
434447
_, err = c.JoinCluster(ctx, n.RaftContext)
435448
if err == nil {
436449
break
437450
}
438-
if grpc.ErrorDesc(err) == conn.ErrDuplicateRaftId.Error() {
439-
x.Fatalf("Error while joining cluster %v", err)
451+
errorDesc := grpc.ErrorDesc(err)
452+
if errorDesc == conn.ErrDuplicateRaftId.Error() ||
453+
errorDesc == x.ErrReuseRemovedId.Error() {
454+
log.Fatalf("Error while joining cluster: %v", errorDesc)
440455
}
441456
x.Printf("Error while joining cluster %v\n", err)
442457
delay *= 2

dgraph/cmd/zero/zero.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,14 @@ func (s *Server) Connect(ctx context.Context,
340340
fmt.Println("No address provided.")
341341
return &emptyConnectionState, errInvalidAddress
342342
}
343+
344+
for _, member := range s.membershipState().Removed {
345+
// It is not recommended to reuse RAFT ids.
346+
if member.GroupId != 0 && m.Id == member.Id {
347+
return &emptyConnectionState, x.ErrReuseRemovedId
348+
}
349+
}
350+
343351
for _, group := range s.state.Groups {
344352
member, has := group.Members[m.Id]
345353
if !has {
@@ -353,6 +361,7 @@ func (s *Server) Connect(ctx context.Context,
353361
}
354362
}
355363
}
364+
356365
// Create a connection and check validity of the address by doing an Echo.
357366
conn.Get().Connect(m.Addr)
358367

wiki/content/contribute/index.md

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,11 @@ Over years of writing big scalable systems, we are convinced that striving for s
9797

9898
Every new source file must begin with a license header.
9999

100-
Badger repo and files under `client/` and `x/` in Dgraph repo are licensed under the Apache license:
100+
Badger repo and the dgraph clients(dgo, dgraph-js, pydgraph and dgraph4j) are licensed under the Apache license:
101101

102102

103103
/*
104-
* Copyright 2016-17 Dgraph Labs, Inc.
104+
* Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
105105
*
106106
* Licensed under the Apache License, Version 2.0 (the "License");
107107
* you may not use this file except in compliance with the License.
@@ -116,24 +116,15 @@ Badger repo and files under `client/` and `x/` in Dgraph repo are licensed under
116116
* limitations under the License.
117117
*/
118118

119-
All other code in the Dgraph repo is licenses under the GNU AGPL v3 license:
119+
All the code in the Dgraph repo is licensed under the Apache license with the Commons Clause
120+
restriction:
120121

121122

122123
/*
123-
* Copyright (C) 2017 Dgraph Labs, Inc. and Contributors
124+
* Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
124125
*
125-
* This program is free software: you can redistribute it and/or modify
126-
* it under the terms of the GNU Affero General Public License as published by
127-
* the Free Software Foundation, either version 3 of the License, or
128-
* (at your option) any later version.
129-
*
130-
* This program is distributed in the hope that it will be useful,
131-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
132-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
133-
* GNU Affero General Public License for more details.
134-
*
135-
* You should have received a copy of the GNU Affero General Public License
136-
* along with this program. If not, see <http://www.gnu.org/licenses/>.
126+
* This file is available under the Apache License, Version 2.0,
127+
* with the Commons Clause restriction.
137128
*/
138129

139130
### Signed Commits

wiki/content/deploy/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ id of the Zero node.
10691069
{{% notice "note" %}}
10701070
Before using the api ensure that the node is down and ensure that it doesn't come back up ever again.
10711071

1072-
Remember to specify the `idx` flag while replacing the dead node or else Zero might assign it a different group.
1072+
You should not use the same `idx` as that of a node that was removed earlier.
10731073
{{% /notice %}}
10741074
* `/moveTablet?tablet=name&group=2` This endpoint can be used to move a tablet to a group. Zero
10751075
already does shard rebalancing every 8 mins, this endpoint can be used to force move a tablet.

worker/groups.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
"sync/atomic"
2424
"time"
2525

26+
"google.golang.org/grpc"
27+
2628
"golang.org/x/net/context"
2729

2830
"github.com/dgraph-io/badger"
@@ -94,10 +96,10 @@ func StartRaftNodes(walStore *badger.ManagedDB, bindall bool) {
9496
m := &intern.Member{Id: Config.RaftId, Addr: Config.MyAddr}
9597
delay := 50 * time.Millisecond
9698
maxHalfDelay := 15 * time.Second
99+
var err error
97100
for { // Keep on retrying. See: https://github.com/dgraph-io/dgraph/issues/2289
98-
var err error
99101
connState, err = zc.Connect(gr.ctx, m)
100-
if err == nil {
102+
if err == nil || grpc.ErrorDesc(err) == x.ErrReuseRemovedId.Error() {
101103
break
102104
}
103105
x.Printf("Error while connecting with group zero: %v", err)
@@ -106,6 +108,7 @@ func StartRaftNodes(walStore *badger.ManagedDB, bindall bool) {
106108
delay *= 2
107109
}
108110
}
111+
x.CheckfNoTrace(err)
109112
if connState.GetMember() == nil || connState.GetState() == nil {
110113
x.Fatalf("Unable to join cluster via dgraphzero")
111114
}

x/error.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ func Checkf(err error, format string, args ...interface{}) {
4040
}
4141
}
4242

43+
func CheckfNoTrace(err error) {
44+
if err != nil {
45+
log.Fatalf(err.Error())
46+
}
47+
}
48+
4349
// Check2 acts as convenience wrapper around Check, using the 2nd argument as error.
4450
func Check2(_ interface{}, err error) {
4551
Check(err)

x/x.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"bytes"
1313
"context"
1414
"encoding/json"
15+
"errors"
1516
"fmt"
1617
"net"
1718
"net/http"
@@ -61,7 +62,8 @@ const (
6162

6263
var (
6364
// Useful for running multiple servers on the same machine.
64-
regExpHostName = regexp.MustCompile(ValidHostnameRegex)
65+
regExpHostName = regexp.MustCompile(ValidHostnameRegex)
66+
ErrReuseRemovedId = errors.New("Reusing RAFT index of a removed node.")
6567
)
6668

6769
// WhiteSpace Replacer removes spaces and tabs from a string.

0 commit comments

Comments
 (0)