-
Notifications
You must be signed in to change notification settings - Fork 647
/
ring.go
281 lines (251 loc) · 7.22 KB
/
ring.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
// Copyright (C) 2019-2022, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.
package consistent
import (
"errors"
"sync"
"github.com/ava-labs/avalanchego/utils/hashing"
"github.com/google/btree"
)
var (
_ Ring = (*hashRing)(nil)
_ btree.LessFunc[ringItem] = ringItem.Less
errEmptyRing = errors.New("ring doesn't have any members")
)
// Ring is an interface for a consistent hashing ring
// (ref: https://en.wikipedia.org/wiki/Consistent_hashing).
//
// Consistent hashing is a method of distributing keys across an arbitrary set
// of destinations.
//
// Consider a naive approach, which uses modulo to map a key to a node to serve
// cached requests. Let N be the number of keys and M is the amount of possible
// hashing destinations. Here, a key would be routed to a node based on
// h(key) % M = node assigned.
//
// With this approach, we can route a key to a node in O(1) time, but this
// results in cache misses as nodes are introduced and removed, which results in
// the keys being reshuffled across nodes, as modulo's output changes with M.
// This approach results in O(N) keys being shuffled, which is undesirable for a
// caching use-case.
//
// Consistent hashing works by hashing all keys into a circle, which can be
// visualized as a clock. Keys are routed to a node by hashing the key, and
// searching for the first clockwise neighbor. This requires O(N) memory to
// maintain the state of the ring and log(N) time to route a key using
// binary-search, but results in O(N/M) amount of keys being shuffled when a
// node is added/removed because the addition/removal of a node results in a
// split/merge of its counter-clockwise neighbor's hash space.
//
// As an example, assume we have a ring that supports hashes from 1-12.
//
// 12
// 11 1
//
// 10 2
//
// 9 3
//
// 8 4
//
// 7 5
// 6
//
// Add node 1 (n1). Let h(n1) = 12.
// First, we compute the hash the node, and insert it into its corresponding
// location on the ring.
//
// 12 (n1)
// 11 1
//
// 10 2
//
// 9 3
//
// 8 4
//
// 7 5
// 6
//
// Now, to see which node a key (k1) should map to, we hash the key and search
// for its closest clockwise neighbor.
// Let h(k1) = 3. Here, we see that since n1 is the closest neighbor, as there
// are no other nodes in the ring.
//
// 12 (n1)
// 11 1
//
// 10 2
//
// 9 3 (k1)
//
// 8 4
//
// 7 5
// 6
//
// Now, let's insert another node (n2), such that h(n2) = 6.
// Here we observe that k1 has shuffled to n2, as n2 is the closest clockwise
// neighbor to k1.
//
// 12 (n1)
// 11 1
//
// 10 2
//
// 9 3 (k1)
//
// 8 4
//
// 7 5
// 6 (n2)
//
// Other optimizations can be made to help reduce blast radius of failures and
// the variance in keys (hot shards). One such optimization is introducing
// virtual nodes, which is to replicate a single node multiple times by salting
// it, (e.g n1-0, n1-1...).
//
// Without virtualization, failures of a node cascade as each node failing
// results in the load of the failed node being shuffled into its clockwise
// neighbor, which can result in a snowball effect across the network.
type Ring interface {
RingReader
ringMutator
}
// RingReader is an interface to read values from Ring.
type RingReader interface {
// Get gets the closest clockwise node for a key in the ring.
//
// Each ring member is responsible for the hashes which fall in the range
// between (myself, clockwise-neighbor].
// This behavior is desirable so that we can re-use the return value of Get
// to iterate around the ring (Ex. replication, retries, etc).
//
// Returns the node routed to and an error if we're unable to resolve a node
// to map to.
Get(Hashable) (Hashable, error)
}
// ringMutator defines an interface that mutates Ring.
type ringMutator interface {
// Add adds a node to the ring.
Add(Hashable)
// Remove removes the node from the ring.
//
// Returns true if the node was removed, and false if it wasn't present to
// begin with.
Remove(Hashable) bool
}
// hashRing is an implementation of Ring
type hashRing struct {
// Hashing algorithm to use when hashing keys.
hasher hashing.Hasher
// Replication factor for nodes; must be greater than zero.
virtualNodes int
lock sync.RWMutex
ring *btree.BTreeG[ringItem]
}
// RingConfig configures settings for a Ring.
type RingConfig struct {
// Replication factor for nodes in the ring.
VirtualNodes int
// Hashing implementation to use.
Hasher hashing.Hasher
// Degree represents the degree of the b-tree
Degree int
}
// NewHashRing instantiates an instance of hashRing.
func NewHashRing(config RingConfig) Ring {
return &hashRing{
hasher: config.Hasher,
virtualNodes: config.VirtualNodes,
ring: btree.NewG(config.Degree, ringItem.Less),
}
}
func (h *hashRing) Get(key Hashable) (Hashable, error) {
h.lock.RLock()
defer h.lock.RUnlock()
return h.get(key)
}
func (h *hashRing) get(key Hashable) (Hashable, error) {
// If we have no members in the ring, it's not possible to find where the
// key belongs.
if h.ring.Len() == 0 {
return nil, errEmptyRing
}
var (
// Compute this key's hash
hash = h.hasher.Hash(key.ConsistentHashKey())
result Hashable
)
h.ring.AscendGreaterOrEqual(
ringItem{
hash: hash,
value: key,
},
func(item ringItem) bool {
if hash < item.hash {
result = item.value
return false
}
return true
},
)
// If found nothing ascending the tree, we need to wrap around the ring to
// the left-most (min) node.
if result == nil {
min, _ := h.ring.Min()
result = min.value
}
return result, nil
}
func (h *hashRing) Add(key Hashable) {
h.lock.Lock()
defer h.lock.Unlock()
h.add(key)
}
func (h *hashRing) add(key Hashable) {
// Replicate the node in the ring.
hashKey := key.ConsistentHashKey()
for i := 0; i < h.virtualNodes; i++ {
virtualNode := getHashKey(hashKey, i)
virtualNodeHash := h.hasher.Hash(virtualNode)
// Insert it into the ring.
h.ring.ReplaceOrInsert(ringItem{
hash: virtualNodeHash,
value: key,
})
}
}
func (h *hashRing) Remove(key Hashable) bool {
h.lock.Lock()
defer h.lock.Unlock()
return h.remove(key)
}
func (h *hashRing) remove(key Hashable) bool {
var (
hashKey = key.ConsistentHashKey()
removed = false
)
// We need to delete all virtual nodes created for a single node.
for i := 0; i < h.virtualNodes; i++ {
virtualNode := getHashKey(hashKey, i)
virtualNodeHash := h.hasher.Hash(virtualNode)
item := ringItem{
hash: virtualNodeHash,
}
_, removed = h.ring.Delete(item)
}
return removed
}
// getHashKey builds a key given a base key and a virtual node number.
func getHashKey(key []byte, virtualNode int) []byte {
return append(key, byte(virtualNode))
}
// ringItem is a helper class to represent ring nodes in the b-tree.
type ringItem struct {
hash uint64
value Hashable
}
func (r ringItem) Less(than ringItem) bool {
return r.hash < than.hash
}