forked from influxdata/influxdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
store.go
359 lines (305 loc) · 7.75 KB
/
store.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
package meta
import (
"errors"
"fmt"
"io/ioutil"
"log"
"math/rand"
"net"
"os"
"sync"
"time"
"github.com/influxdb/influxdb/services/meta/internal"
"github.com/gogo/protobuf/proto"
"github.com/hashicorp/raft"
)
// Retention policy settings.
const (
autoCreateRetentionPolicyName = "default"
autoCreateRetentionPolicyPeriod = 0
// maxAutoCreatedRetentionPolicyReplicaN is the maximum replication factor that will
// be set for auto-created retention policies.
maxAutoCreatedRetentionPolicyReplicaN = 3
)
// Raft configuration.
const (
raftListenerStartupTimeout = time.Second
)
type store struct {
mu sync.RWMutex
closing chan struct{}
config *Config
data *Data
raftState *raftState
dataChanged chan struct{}
path string
opened bool
logger *log.Logger
// Authentication cache.
authCache map[string]authUser
raftAddr string
httpAddr string
}
type authUser struct {
salt []byte
hash []byte
}
// newStore will create a new metastore with the passed in config
func newStore(c *Config, httpAddr, raftAddr string) *store {
s := store{
data: &Data{
Index: 1,
},
closing: make(chan struct{}),
dataChanged: make(chan struct{}),
path: c.Dir,
config: c,
httpAddr: httpAddr,
raftAddr: raftAddr,
}
if c.LoggingEnabled {
s.logger = log.New(os.Stderr, "[metastore] ", log.LstdFlags)
} else {
s.logger = log.New(ioutil.Discard, "", 0)
}
return &s
}
// open opens and initializes the raft store.
func (s *store) open(raftln net.Listener) error {
s.logger.Printf("Using data dir: %v", s.path)
// See if this server needs to join the raft consensus group
var initializePeers []string
if len(s.config.JoinPeers) > 0 {
c := NewClient(s.config.JoinPeers, s.config.HTTPSEnabled)
data := c.retryUntilSnapshot(0)
for _, n := range data.MetaNodes {
initializePeers = append(initializePeers, n.TCPHost)
}
initializePeers = append(initializePeers, s.raftAddr)
}
if err := func() error {
s.mu.Lock()
defer s.mu.Unlock()
// Check if store has already been opened.
if s.opened {
return ErrStoreOpen
}
s.opened = true
// Create the root directory if it doesn't already exist.
if err := os.MkdirAll(s.path, 0777); err != nil {
return fmt.Errorf("mkdir all: %s", err)
}
// Open the raft store.
if err := s.openRaft(initializePeers, raftln); err != nil {
return fmt.Errorf("raft: %s", err)
}
return nil
}(); err != nil {
return err
}
if len(s.config.JoinPeers) > 0 {
c := NewClient(s.config.JoinPeers, s.config.HTTPSEnabled)
if err := c.Open(); err != nil {
return err
}
defer c.Close()
if err := c.JoinMetaServer(s.httpAddr, s.raftAddr); err != nil {
return err
}
}
// Wait for a leader to be elected so we know the raft log is loaded
// and up to date
if err := s.waitForLeader(0); err != nil {
return err
}
// Make sure this server is in the list of metanodes
peers, err := s.raftState.peers()
if err != nil {
return err
}
if len(peers) <= 1 {
// we have to loop here because if the hostname has changed
// raft will take a little bit to normalize so that this host
// will be marked as the leader
for {
err := s.setMetaNode(s.httpAddr, s.raftAddr)
if err == nil {
break
}
time.Sleep(100 * time.Millisecond)
}
}
return nil
}
func (s *store) openRaft(initializePeers []string, raftln net.Listener) error {
rs := newRaftState(s.config, s.raftAddr)
rs.logger = s.logger
rs.path = s.path
if err := rs.open(s, raftln, initializePeers); err != nil {
return err
}
s.raftState = rs
return nil
}
func (s *store) close() error {
s.mu.Lock()
defer s.mu.Unlock()
select {
case <-s.closing:
// already closed
return nil
default:
close(s.closing)
return s.raftState.close()
}
}
func (s *store) snapshot() (*Data, error) {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data.Clone(), nil
}
// afterIndex returns a channel that will be closed to signal
// the caller when an updated snapshot is available.
func (s *store) afterIndex(index uint64) <-chan struct{} {
s.mu.RLock()
defer s.mu.RUnlock()
if index < s.data.Index {
// Client needs update so return a closed channel.
ch := make(chan struct{})
close(ch)
return ch
}
return s.dataChanged
}
// WaitForLeader sleeps until a leader is found or a timeout occurs.
// timeout == 0 means to wait forever.
func (s *store) waitForLeader(timeout time.Duration) error {
// Begin timeout timer.
timer := time.NewTimer(timeout)
defer timer.Stop()
// Continually check for leader until timeout.
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-s.closing:
return errors.New("closing")
case <-timer.C:
if timeout != 0 {
return errors.New("timeout")
}
case <-ticker.C:
if s.leader() != "" {
return nil
}
}
}
}
// isLeader returns true if the store is currently the leader.
func (s *store) isLeader() bool {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raftState == nil {
return false
}
return s.raftState.raft.State() == raft.Leader
}
// leader returns what the store thinks is the current leader. An empty
// string indicates no leader exists.
func (s *store) leader() string {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raftState == nil {
return ""
}
return s.raftState.raft.Leader()
}
// leaderHTTP returns the HTTP API connection info for the metanode
// that is the raft leader
func (s *store) leaderHTTP() string {
s.mu.RLock()
defer s.mu.RUnlock()
if s.raftState == nil {
return ""
}
l := s.raftState.raft.Leader()
for _, n := range s.data.MetaNodes {
if n.TCPHost == l {
return n.Host
}
}
return ""
}
// otherMetaServersHTTP will return the HTTP bind addresses of the other
// meta servers in the cluster
func (s *store) otherMetaServersHTTP() []string {
s.mu.RLock()
defer s.mu.RUnlock()
var a []string
for _, n := range s.data.MetaNodes {
if n.TCPHost != s.raftAddr {
a = append(a, n.Host)
}
}
return a
}
// index returns the current store index.
func (s *store) index() uint64 {
s.mu.RLock()
defer s.mu.RUnlock()
return s.data.Index
}
// apply applies a command to raft.
func (s *store) apply(b []byte) error {
return s.raftState.apply(b)
}
// join adds a new server to the metaservice and raft
func (s *store) join(n *NodeInfo) error {
if err := s.raftState.addPeer(n.TCPHost); err != nil {
return err
}
return s.createMetaNode(n.Host, n.TCPHost)
}
// leave removes a server from the metaservice and raft
func (s *store) leave(n *NodeInfo) error {
return s.raftState.removePeer(n.TCPHost)
}
// createMetaNode is used by the join command to create the metanode int
// the metastore
func (s *store) createMetaNode(addr, raftAddr string) error {
val := &internal.CreateMetaNodeCommand{
HTTPAddr: proto.String(addr),
TCPAddr: proto.String(raftAddr),
Rand: proto.Uint64(uint64(rand.Int63())),
}
t := internal.Command_CreateMetaNodeCommand
cmd := &internal.Command{Type: &t}
if err := proto.SetExtension(cmd, internal.E_CreateMetaNodeCommand_Command, val); err != nil {
panic(err)
}
b, err := proto.Marshal(cmd)
if err != nil {
return err
}
return s.apply(b)
}
// setMetaNode is used when the raft group has only a single peer. It will
// either create a metanode or update the information for the one metanode
// that is there. It's used because hostnames can change
func (s *store) setMetaNode(addr, raftAddr string) error {
val := &internal.SetMetaNodeCommand{
HTTPAddr: proto.String(addr),
TCPAddr: proto.String(raftAddr),
Rand: proto.Uint64(uint64(rand.Int63())),
}
t := internal.Command_SetMetaNodeCommand
cmd := &internal.Command{Type: &t}
if err := proto.SetExtension(cmd, internal.E_SetMetaNodeCommand_Command, val); err != nil {
panic(err)
}
b, err := proto.Marshal(cmd)
if err != nil {
return err
}
return s.apply(b)
}