/
onecache.go
339 lines (287 loc) · 7.9 KB
/
onecache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
package onecache
import (
"errors"
"fmt"
"log"
"net"
"strconv"
"sync"
"time"
"os"
"github.com/dadgar/onecache/ttlstore"
"github.com/hashicorp/serf/serf"
"github.com/pborman/uuid"
)
const (
// externalRpcTag is used to create a serf tag to broadcast the port being
// used for RPC.
externalRpcTag = "externalRpcTag"
)
// peerNode holds the required information for sharding and forwarding RPCs to a
// peer node in the ring.
type peerNode struct {
name string
addr string
port int
}
// Node holds all the necessary information to store data, handle client
// connections, forward among the ring and replicate.
type Node struct {
name string
logger *log.Logger
serf *serf.Serf
eventCh chan serf.Event
quitCh chan bool
ring ring
replicas int
nodes map[string]*peerNode
data *ttlstore.DataStore
replicator dataReplicator
port int
listener net.Listener
shutdown bool
server *rpcServer
lock sync.Mutex
config *NodeConfig
gc *time.Timer
}
// Create creates a new Node instance and initializes all resources necessary to
// be a member of the hash ring.
func Create(config *NodeConfig) (*Node, error) {
if err := configValidator(config); err != nil {
return nil, err
}
node := &Node{
name: uuid.New(),
eventCh: make(chan serf.Event),
quitCh: make(chan bool),
replicas: config.Replicas,
nodes: make(map[string]*peerNode),
config: config,
}
// RPC Endpoint
node.server = newRpcServer(node)
// Initialize the ring.
node.ring = newConsistentRing(node.name, config.Replicas)
// Set up logging.
if config.LogOutput == nil {
config.LogOutput = os.Stderr
}
node.logger = log.New(config.LogOutput, "", log.LstdFlags)
// Initialize the data storage.
d, err := ttlstore.New(config.MaxMemoryUsage, node.logger)
if err != nil {
return nil, fmt.Errorf("DataStore initialization failed: %v", err)
}
node.data = d
// Initialize replication
if config.Replicas == 1 {
node.replicator = &noOpReplicator{}
} else {
node.replicator = newReplicator(node)
}
// Initialize garbage collection.
node.gc = time.AfterFunc(10*time.Minute, node.garbageCollect)
node.gc.Stop()
// Initialize the RPC Server
if err := node.initRpcServer(config.RpcPort); err != nil {
return nil, fmt.Errorf("RPC server initialization failed: %v", err)
}
if err := node.initSerf(config.SerfConfig); err != nil {
return nil, fmt.Errorf("serf initialization failed: %v", err)
}
return node, nil
}
// Start starts handling connections from other rings in the cluster as well as
// clients.
func (n *Node) Start() {
go n.serfEventHandler()
go n.listen()
}
// configValidator ensures that the configuration is valid and returns an error
// if it is not.
func configValidator(config *NodeConfig) error {
if config == nil {
return errors.New("must supply a non-nil config")
}
if config.Replicas < 1 {
return errors.New("must have at least one replica")
}
return nil
}
// initSerf initializes serf and advertizes this nodes RPC port to other nodes.
func (n *Node) initSerf(config *serf.Config) error {
config.NodeName = n.name
config.EventCh = n.eventCh
// Advertize the RPC port
tags := map[string]string{
externalRpcTag: strconv.Itoa(n.port),
}
config.Tags = tags
// Start serf for discovery
s, err := serf.Create(config)
if err != nil {
return err
}
n.serf = s
return nil
}
// serfEventHandler is a dispatch method on Serf Events. Should be started in a
// goroutine.
func (n *Node) serfEventHandler() {
for {
select {
case <-n.quitCh:
return
case e := <-n.eventCh:
switch t := e.(type) {
case serf.MemberEvent:
switch t.EventType() {
case serf.EventMemberJoin:
go n.handleNodeJoins(t.Members)
case serf.EventMemberLeave, serf.EventMemberFailed:
go n.handleNodeLeaves(t.Members)
default:
// Don't handle any other event type.
continue
}
default:
n.logger.Printf("[ERROR] onecache: unexpected Serf event type %v\n", t)
continue
}
}
}
}
// Exit removes this Node from the ring, kills background tasks and frees any
// used resources. Once this is called the Node will no longer accept
// connections and can not be used.
func (n *Node) Exit() error {
close(n.quitCh)
n.replicator.Stop()
// Clean up the datastore.
if err := n.data.Destroy(); err != nil {
return err
}
// Close the listener and RPC Server.
n.shutdown = true
if err := n.listener.Close(); err != nil {
return err
}
if err := n.server.shutdown(); err != nil {
return err
}
if err := n.serf.Leave(); err != nil {
return err
}
if err := n.serf.Shutdown(); err != nil {
return err
}
return nil
}
// Join adds this Node to an existing set ring. Nodes is a list of ip:port port pairs
// or DNS addresses.
func (n *Node) Join(nodes []string) error {
_, err := n.serf.Join(nodes, true)
return err
}
func (n *Node) handleNodeJoins(members []serf.Member) {
n.memberStateUpdates(members, false)
}
func (n *Node) handleNodeLeaves(members []serf.Member) {
n.memberStateUpdates(members, true)
}
// Members should be called when serf members join or leave. Remove is set to
// true if nodes left. This method updates state, including the ring to reflect
// Nodes joining/leaving.
func (n *Node) memberStateUpdates(members []serf.Member, remove bool) {
n.lock.Lock()
var peers []string
for _, m := range members {
p, err := buildPeerNode(m)
if err != nil {
n.logger.Printf("[ERROR] onecache: buildPeerNode(%v) failed: %v\n", m, err)
continue
}
peers = append(peers, p.name)
if err := n.memberStateUpdate(p, remove); err != nil {
n.logger.Printf("[ERROR] onecache: memberStateUpdate(%v, %v) failed: %v\n", m, remove, err)
}
}
n.lock.Unlock()
// Want to mark after all state updates have been applied because the state of replicas can be in flux.
if remove {
n.replicator.NodesRemoved(peers)
} else {
if err := n.replicator.MarkPeersDirty(peers); err != nil {
n.logger.Printf("[ERROR] onecache.replicator: %v", err)
}
}
// The set of Nodes has changed so trigger garbage collection
n.gc.Reset(10 * time.Minute)
}
// memberStateUpdate handles a single node joining or leaving.
func (n *Node) memberStateUpdate(p *peerNode, remove bool) error {
if remove {
if _, exists := n.nodes[p.name]; !exists {
return fmt.Errorf("delete of non-existent peer %+v", p)
}
n.logger.Printf("[INFO] onecache: removed peer node %v", p)
delete(n.nodes, p.name)
n.ring.remove(p.name)
return nil
}
n.logger.Printf("[INFO] onecache: added peer node %v", p)
n.nodes[p.name] = p
n.ring.add(p.name)
return nil
}
// buildPeerNode takes a serf member and returns a peerNode
func buildPeerNode(m serf.Member) (*peerNode, error) {
name := m.Name
portString, ok := m.Tags[externalRpcTag]
if !ok {
return nil, fmt.Errorf("member joined without the rpc port set: %+v", m)
}
port, err := strconv.ParseInt(portString, 0, 0)
if err != nil {
return nil, fmt.Errorf("couldn't parse rpc port string %v for member %+v", portString, m)
}
return &peerNode{
name: name,
addr: m.Addr.String(),
port: int(port),
}, nil
}
// garbageCollect scans for stored keys that this node doesn't own or should not
// be replicated to it and removes them.
func (n *Node) garbageCollect() {
keys, replicating := n.getGCKeys()
for _, key := range keys {
n.data.Delete(key)
}
// Queue another round of GC since we were replicating unowned keys.
if replicating {
n.gc.Reset(10 * time.Minute)
}
}
// getGCKeys returns keys that should be GC'd and whether there should be a
// follow up GC because keys are being replicated.
func (n *Node) getGCKeys() ([]string, bool) {
keys := n.data.List()
var gc []string
var replicating bool
for _, key := range keys {
replicas := n.ring.replicaPeers(key)
for _, replica := range replicas {
if replica == n.name {
if n.replicator.IsReplicating(key) {
replicating = true
continue
}
gc = append(gc, key)
break
}
}
}
return gc, replicating
}