-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
allocator.go
1066 lines (884 loc) · 35.3 KB
/
allocator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium
package allocator
import (
"context"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
"github.com/cilium/cilium/pkg/backoff"
"github.com/cilium/cilium/pkg/idpool"
"github.com/cilium/cilium/pkg/inctimer"
"github.com/cilium/cilium/pkg/kvstore"
"github.com/cilium/cilium/pkg/lock"
"github.com/cilium/cilium/pkg/logging"
"github.com/cilium/cilium/pkg/logging/logfields"
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/pkg/rate"
)
var (
log = logging.DefaultLogger.WithField(logfields.LogSubsys, "allocator")
)
const (
// maxAllocAttempts is the number of attempted allocation requests
// performed before failing.
maxAllocAttempts = 16
)
// Allocator is a distributed ID allocator backed by a KVstore. It maps
// arbitrary keys to identifiers. Multiple users on different cluster nodes can
// in parallel request the ID for keys and are guaranteed to retrieve the same
// ID for an identical key.
//
// While the details of how keys are stored is delegated to Backend
// implementations, some expectations exist. See pkg/kvstore/allocator for
// details about the kvstore implementation.
//
// A node takes a reference to an identity when it is in-use on that node, and
// the identity remains in-use if there is any node reference to it. When an
// identity no longer has any node references, it may be garbage collected. No
// guarantees are made at that point and the numeric identity may be reused.
// Note that the numeric IDs are selected locally and verified with the Backend.
//
// Lookup ID by key:
// 1. Return ID from local cache updated by watcher (no Backend interactions)
// 2. Do ListPrefix() on slave key excluding node suffix, return the first
// result that matches the exact prefix.
//
// Lookup key by ID:
// 1. Return key from local cache updated by watcher (no Backend interactions)
// 2. Do Get() on master key, return result
//
// Allocate:
// 1. Check local key cache, increment, and return if key is already in use
// locally (no Backend interactions)
// 2. Check local cache updated by watcher, if...
//
// ... match found:
//
// 2.1 Create a new slave key. This operation is potentially racy as the master
// key can be removed in the meantime.
// - etcd: Create is made conditional on existence of master key
// - consul: locking
//
// ... match not found:
//
// 2.1 Select new unused id from local cache
// 2.2 Create a new master key with the condition that it may not exist
// 2.3 Create a new slave key
//
// 1.1. If found, increment and return (no Backend interactions)
// 2. Lookup ID by key in local cache or via first slave key found in Backend
//
// Release:
// 1. Reduce local reference count until last use (no Backend interactions)
// 2. Delete slave key (basePath/value/key1/node1)
// This automatically guarantees that when the last node has released the
// key, the key is no longer found by Get()
// 3. If the node goes down, all slave keys of that node are removed after
// the TTL expires (auto release).
type Allocator struct {
// events is a channel which will receive AllocatorEvent as IDs are
// added, modified or removed from the allocator
events AllocatorEventSendChan
// keyType is an instance of the type to be used as allocator key.
keyType AllocatorKey
// min is the lower limit when allocating IDs. The allocator will never
// allocate an ID lesser than this value.
min idpool.ID
// max is the upper limit when allocating IDs. The allocator will never
// allocate an ID greater than this value.
max idpool.ID
// prefixMask if set, will be ORed to all selected IDs prior to
// allocation
prefixMask idpool.ID
// localKeys contains all keys including their reference count for keys
// which have been allocated and are in local use
localKeys *localKeys
// suffix is the suffix attached to keys which must be node specific,
// this is typical set to the node's IP address
suffix string
// backoffTemplate is the backoff configuration while allocating
backoffTemplate backoff.Exponential
// slaveKeysMutex protects the concurrent access of the slave key by this
// agent.
slaveKeysMutex lock.Mutex
// mainCache is the main cache, representing the allocator contents of
// the primary kvstore connection
mainCache cache
// remoteCachesMutex protects accesse to remoteCaches
remoteCachesMutex lock.RWMutex
// remoteCaches is the list of additional remote caches being watched
// in addition to the main cache
remoteCaches map[string]*RemoteCache
// stopGC is the channel used to stop the garbage collector
stopGC chan struct{}
// initialListDone is a channel that is closed when the initial
// synchronization has completed
initialListDone waitChan
// idPool maintains a pool of available ids for allocation.
idPool idpool.IDPool
// enableMasterKeyProtection if true, causes master keys that are still in
// local use to be automatically re-created
enableMasterKeyProtection bool
// disableGC disables the garbage collector
disableGC bool
// disableAutostart prevents starting the allocator when it is initialized
disableAutostart bool
// backend is the upstream, shared, backend to which we syncronize local
// information
backend Backend
}
// AllocatorOption is the base type for allocator options
type AllocatorOption func(*Allocator)
// NewAllocatorForGC returns an allocator that can be used to run RunGC()
//
// The allocator can be configured by passing in additional options:
// - WithMin(id) - minimum ID to allocate (default: 1)
// - WithMax(id) - maximum ID to allocate (default max(uint64))
func NewAllocatorForGC(backend Backend, opts ...AllocatorOption) *Allocator {
a := &Allocator{
backend: backend,
min: idpool.ID(1),
max: idpool.ID(^uint64(0)),
}
for _, fn := range opts {
fn(a)
}
return a
}
type GCStats struct {
// Alive is the number of identities alive
Alive int
// Deleted is the number of identities deleted
Deleted int
}
// Backend represents clients to remote ID allocation systems, such as KV
// Stores. These are used to coordinate key->ID allocation between cilium
// nodes.
type Backend interface {
// DeleteAllKeys will delete all keys. It is used in tests.
DeleteAllKeys(ctx context.Context)
// Encode encodes a key string as required to conform to the key
// restrictions of the backend
Encode(string) string
// AllocateID creates a new key->ID association. This is expected to be a
// create-only operation, and the ID may be allocated by another node. An
// error in that case is not expected to be fatal. The actual ID is obtained
// by Allocator from the local idPool, which is updated with used-IDs as the
// Backend makes calls to the handler in ListAndWatch.
// The implementation of the backend might return an AllocatorKey that is
// a copy of 'key' with an internal reference of the backend key or, if it
// doesn't use the internal reference of the backend key it simply returns
// 'key'. In case of an error the returned 'AllocatorKey' should be nil.
AllocateID(ctx context.Context, id idpool.ID, key AllocatorKey) (AllocatorKey, error)
// AllocateIDIfLocked behaves like AllocateID but when lock is non-nil the
// operation proceeds only if it is still valid.
// The implementation of the backend might return an AllocatorKey that is
// a copy of 'key' with an internal reference of the backend key or, if it
// doesn't use the internal reference of the backend key it simply returns
// 'key'. In case of an error the returned 'AllocatorKey' should be nil.
AllocateIDIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) (AllocatorKey, error)
// AcquireReference records that this node is using this key->ID mapping.
// This is distinct from any reference counting within this agent; only one
// reference exists for this node for any number of managed endpoints using
// it.
// The semantics of cleaning up stale references is delegated to the Backend
// implementation. RunGC may need to be invoked.
// This can race, and so lock can be provided (via a Lock call, below).
AcquireReference(ctx context.Context, id idpool.ID, key AllocatorKey, lock kvstore.KVLocker) error
// Release releases the use of an ID associated with the provided key. It
// does not guard against concurrent calls to
// releases.Release(ctx context.Context, key AllocatorKey) (err error)
Release(ctx context.Context, id idpool.ID, key AllocatorKey) (err error)
// UpdateKey refreshes the record that this node is using this key -> id
// mapping. When reliablyMissing is set it will also recreate missing master or
// slave keys.
UpdateKey(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool) error
// UpdateKeyIfLocked behaves like UpdateKey but when lock is non-nil the operation proceeds only if it is still valid.
UpdateKeyIfLocked(ctx context.Context, id idpool.ID, key AllocatorKey, reliablyMissing bool, lock kvstore.KVLocker) error
// Get returns the allocated ID for this key as seen by the Backend. This may
// have been created by other agents.
Get(ctx context.Context, key AllocatorKey) (idpool.ID, error)
// GetIfLocked behaves like Get, but but when lock is non-nil the
// operation proceeds only if it is still valid.
GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error)
// GetByID returns the key associated with this ID, as seen by the Backend.
// This may have been created by other agents.
GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error)
// Lock provides an opaque lock object that can be used, later, to ensure
// that the key has not changed since the lock was created. This can be done
// with GetIfLocked.
Lock(ctx context.Context, key AllocatorKey) (kvstore.KVLocker, error)
// ListAndWatch begins synchronizing the local Backend instance with its
// remote.
ListAndWatch(ctx context.Context, handler CacheMutations, stopChan chan struct{})
// RunGC reaps stale or unused identities within the Backend and makes them
// available for reuse. It is used by the cilium-operator and is not invoked
// by cilium-agent.
// Note: not all Backend implemenations rely on this, such as the kvstore
// backends, and may use leases to expire keys.
RunGC(ctx context.Context, rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64, minID idpool.ID, maxID idpool.ID) (map[string]uint64, *GCStats, error)
// RunLocksGC reaps stale or unused locks within the Backend. It is used by
// the cilium-operator and is not invoked by cilium-agent. Returns
// a map of locks currently being held in the KVStore including the ones
// that failed to be GCed.
// Note: not all Backend implementations rely on this, such as the kvstore
// backends, and may use leases to expire keys.
RunLocksGC(ctx context.Context, staleKeysPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error)
// Status returns a human-readable status of the Backend.
Status() (string, error)
}
// NewAllocator creates a new Allocator. Any type can be used as key as long as
// the type implements the AllocatorKey interface. A variable of the type has
// to be passed into NewAllocator() to make the type known. The specified base
// path is used to prefix all keys in the kvstore. The provided path must be
// unique.
//
// The allocator can be configured by passing in additional options:
// - WithEvents() - enable Events channel
// - WithMin(id) - minimum ID to allocate (default: 1)
// - WithMax(id) - maximum ID to allocate (default max(uint64))
//
// After creation, IDs can be allocated with Allocate() and released with
// Release()
func NewAllocator(typ AllocatorKey, backend Backend, opts ...AllocatorOption) (*Allocator, error) {
a := &Allocator{
keyType: typ,
backend: backend,
min: idpool.ID(1),
max: idpool.ID(^uint64(0)),
localKeys: newLocalKeys(),
stopGC: make(chan struct{}),
suffix: uuid.New().String()[:10],
remoteCaches: map[string]*RemoteCache{},
backoffTemplate: backoff.Exponential{
Min: time.Duration(20) * time.Millisecond,
Factor: 2.0,
},
}
for _, fn := range opts {
fn(a)
}
a.mainCache = newCache(a)
if a.suffix == "<nil>" {
return nil, errors.New("allocator suffix is <nil> and unlikely unique")
}
if a.min < 1 {
return nil, errors.New("minimum ID must be >= 1")
}
if a.max <= a.min {
return nil, fmt.Errorf("maximum ID must be greater than minimum ID: configured max %v, min %v", a.max, a.min)
}
a.idPool = idpool.NewIDPool(a.min, a.max)
if !a.disableAutostart {
a.start()
}
return a, nil
}
func (a *Allocator) start() {
a.initialListDone = a.mainCache.start()
if !a.disableGC {
go func() {
select {
case <-a.initialListDone:
case <-time.After(option.Config.AllocatorListTimeout):
log.Fatalf("Timeout while waiting for initial allocator state")
}
a.startLocalKeySync()
}()
}
}
// WithBackend sets this allocator to use backend. It is expected to be used at
// initialization.
func WithBackend(backend Backend) AllocatorOption {
return func(a *Allocator) {
a.backend = backend
}
}
// WithEvents enables receiving of events.
//
// CAUTION: When using this function. The provided channel must be continuously
// read while NewAllocator() is being called to ensure that the channel does
// not block indefinitely while NewAllocator() emits events on it while
// populating the initial cache.
func WithEvents(events AllocatorEventSendChan) AllocatorOption {
return func(a *Allocator) { a.events = events }
}
// WithMin sets the minimum identifier to be allocated
func WithMin(id idpool.ID) AllocatorOption {
return func(a *Allocator) { a.min = id }
}
// WithMax sets the maximum identifier to be allocated
func WithMax(id idpool.ID) AllocatorOption {
return func(a *Allocator) { a.max = id }
}
// WithPrefixMask sets the prefix used for all ID allocations. If set, the mask
// will be ORed to all selected IDs prior to allocation. It is the
// responsibility of the caller to ensure that the mask is not conflicting with
// min..max.
func WithPrefixMask(mask idpool.ID) AllocatorOption {
return func(a *Allocator) { a.prefixMask = mask }
}
// WithMasterKeyProtection will watch for delete events on master keys and
// re-created them if local usage suggests that the key is still in use
func WithMasterKeyProtection() AllocatorOption {
return func(a *Allocator) { a.enableMasterKeyProtection = true }
}
// WithoutGC disables the use of the garbage collector
func WithoutGC() AllocatorOption {
return func(a *Allocator) { a.disableGC = true }
}
// WithoutAutostart prevents starting the allocator when it is initialized
func WithoutAutostart() AllocatorOption {
return func(a *Allocator) { a.disableAutostart = true }
}
// GetEvents returns the events channel given to the allocator when
// constructed.
// Note: This channel is not owned by the allocator!
func (a *Allocator) GetEvents() AllocatorEventSendChan {
return a.events
}
// Delete deletes an allocator and stops the garbage collector
func (a *Allocator) Delete() {
close(a.stopGC)
a.mainCache.stop()
}
// WaitForInitialSync waits until the initial sync is complete
func (a *Allocator) WaitForInitialSync(ctx context.Context) error {
select {
case <-a.initialListDone:
case <-ctx.Done():
return fmt.Errorf("identity sync was cancelled: %s", ctx.Err())
}
return nil
}
// RangeFunc is the function called by RangeCache
type RangeFunc func(idpool.ID, AllocatorKey)
// ForeachCache iterates over the allocator cache and calls RangeFunc on each
// cached entry
func (a *Allocator) ForeachCache(cb RangeFunc) {
a.mainCache.foreach(cb)
a.remoteCachesMutex.RLock()
for _, rc := range a.remoteCaches {
rc.cache.foreach(cb)
}
a.remoteCachesMutex.RUnlock()
}
// selectAvailableID selects an available ID.
// Returns a triple of the selected ID ORed with prefixMask, the ID string and
// the originally selected ID.
func (a *Allocator) selectAvailableID() (idpool.ID, string, idpool.ID) {
if id := a.idPool.LeaseAvailableID(); id != idpool.NoID {
unmaskedID := id
id |= a.prefixMask
return id, id.String(), unmaskedID
}
return 0, "", 0
}
// AllocatorKey is the interface to implement in order for a type to be used as
// key for the allocator. The key's data is assumed to be a collection of
// pkg/label.Label, and the functions reflect this somewhat.
type AllocatorKey interface {
fmt.Stringer
// GetKey returns the canonical string representation of the key
GetKey() string
// PutKey stores the information in v into the key. This is the inverse
// operation to GetKey
PutKey(v string) AllocatorKey
// GetAsMap returns the key as a collection of "labels" with a key and value.
// This is the inverse operation to PutKeyFromMap.
GetAsMap() map[string]string
// PutKeyFromMap stores the labels in v into the key to be used later. This
// is the inverse operation to GetAsMap.
PutKeyFromMap(v map[string]string) AllocatorKey
// PutValue puts metadata inside the global identity for the given 'key' with
// the given 'value'.
PutValue(key any, value any) AllocatorKey
// Value returns the value stored in the metadata map.
Value(key any) any
}
func (a *Allocator) encodeKey(key AllocatorKey) string {
return a.backend.Encode(key.GetKey())
}
// Return values:
// 1. allocated ID
// 2. whether the ID is newly allocated from kvstore
// 3. whether this is the first owner that holds a reference to the key in
// localkeys store
// 4. error in case of failure
func (a *Allocator) lockedAllocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) {
var firstUse bool
kvstore.Trace("Allocating key in kvstore", nil, logrus.Fields{fieldKey: key})
k := a.encodeKey(key)
lock, err := a.backend.Lock(ctx, key)
if err != nil {
return 0, false, false, err
}
defer lock.Unlock(context.Background())
// fetch first key that matches /value/<key> while ignoring the
// node suffix
value, err := a.GetIfLocked(ctx, key, lock)
if err != nil {
return 0, false, false, err
}
kvstore.Trace("kvstore state is: ", nil, logrus.Fields{fieldID: value})
a.slaveKeysMutex.Lock()
defer a.slaveKeysMutex.Unlock()
// We shouldn't assume the fact the master key does not exist in the kvstore
// that localKeys does not have it. The KVStore might have lost all of its
// data but the local agent still holds a reference for the given master key.
if value == 0 {
value = a.localKeys.lookupKey(k)
if value != 0 {
// re-create master key
if err := a.backend.UpdateKeyIfLocked(ctx, value, key, true, lock); err != nil {
return 0, false, false, fmt.Errorf("unable to re-create missing master key '%s': %s while allocating ID: %s", key, value, err)
}
}
} else {
_, firstUse, err = a.localKeys.allocate(k, key, value)
if err != nil {
return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %s", k, err)
}
if firstUse {
log.WithField(fieldKey, k).Debug("Reserved new local key")
} else {
log.WithField(fieldKey, k).Debug("Reusing existing local key")
}
}
if value != 0 {
log.WithField(fieldKey, k).Info("Reusing existing global key")
if err = a.backend.AcquireReference(ctx, value, key, lock); err != nil {
a.localKeys.release(k)
return 0, false, false, fmt.Errorf("unable to create secondary key '%s': %s", k, err)
}
// mark the key as verified in the local cache
if err := a.localKeys.verify(k); err != nil {
log.WithError(err).Error("BUG: Unable to verify local key")
}
return value, false, firstUse, nil
}
log.WithField(fieldKey, k).Debug("Allocating new master ID")
id, strID, unmaskedID := a.selectAvailableID()
if id == 0 {
return 0, false, false, fmt.Errorf("no more available IDs in configured space")
}
kvstore.Trace("Selected available key ID", nil, logrus.Fields{fieldID: id})
releaseKeyAndID := func() {
a.localKeys.release(k)
a.idPool.Release(unmaskedID) // This returns this ID to be re-used for other keys
}
oldID, firstUse, err := a.localKeys.allocate(k, key, id)
if err != nil {
a.idPool.Release(unmaskedID)
return 0, false, false, fmt.Errorf("unable to reserve local key '%s': %s", k, err)
}
// Another local writer beat us to allocating an ID for the same key,
// start over
if id != oldID {
releaseKeyAndID()
return 0, false, false, fmt.Errorf("another writer has allocated key %s", k)
}
// Check that this key has not been allocated in the cluster during our
// operation here
value, err = a.GetNoCache(ctx, key)
if err != nil {
releaseKeyAndID()
return 0, false, false, err
}
if value != 0 {
releaseKeyAndID()
return 0, false, false, fmt.Errorf("Found master key after proceeding with new allocation for %s", k)
}
key, err = a.backend.AllocateIDIfLocked(ctx, id, key, lock)
if err != nil {
// Creation failed. Another agent most likely beat us to allocting this
// ID, retry.
releaseKeyAndID()
return 0, false, false, fmt.Errorf("unable to allocate ID %s for key %s: %s", strID, key, err)
}
// Notify pool that leased ID is now in-use.
a.idPool.Use(unmaskedID)
if err = a.backend.AcquireReference(ctx, id, key, lock); err != nil {
// We will leak the master key here as the key has already been
// exposed and may be in use by other nodes. The garbage
// collector will release it again.
releaseKeyAndID()
return 0, false, false, fmt.Errorf("secondary key creation failed '%s': %s", k, err)
}
// mark the key as verified in the local cache
if err := a.localKeys.verify(k); err != nil {
log.WithError(err).Error("BUG: Unable to verify local key")
}
log.WithField(fieldKey, k).Info("Allocated new global key")
return id, true, firstUse, nil
}
// Allocate will retrieve the ID for the provided key. If no ID has been
// allocated for this key yet, a key will be allocated. If allocation fails,
// most likely due to a parallel allocation of the same ID by another user,
// allocation is re-attempted for maxAllocAttempts times.
//
// Return values:
// 1. allocated ID
// 2. whether the ID is newly allocated from kvstore
// 3. whether this is the first owner that holds a reference to the key in
// localkeys store
// 4. error in case of failure
func (a *Allocator) Allocate(ctx context.Context, key AllocatorKey) (idpool.ID, bool, bool, error) {
var (
err error
value idpool.ID
isNew bool
firstUse bool
k = a.encodeKey(key)
)
log.WithField(fieldKey, key).Debug("Allocating key")
select {
case <-a.initialListDone:
case <-ctx.Done():
return 0, false, false, fmt.Errorf("allocation was cancelled while waiting for initial key list to be received: %s", ctx.Err())
}
kvstore.Trace("Allocating from kvstore", nil, logrus.Fields{fieldKey: key})
// make a copy of the template and customize it
boff := a.backoffTemplate
boff.Name = key.String()
for attempt := 0; attempt < maxAllocAttempts; attempt++ {
// Check our list of local keys already in use and increment the
// refcnt. The returned key must be released afterwards. No kvstore
// operation was performed for this allocation.
// We also do this on every loop as a different Allocate call might have
// allocated the key while we are attempting to allocate in this
// execution thread. It does not hurt to check if localKeys contains a
// reference for the key that we are attempting to allocate.
if val := a.localKeys.use(k); val != idpool.NoID {
kvstore.Trace("Reusing local id", nil, logrus.Fields{fieldID: val, fieldKey: key})
a.mainCache.insert(key, val)
return val, false, false, nil
}
// FIXME: Add non-locking variant
value, isNew, firstUse, err = a.lockedAllocate(ctx, key)
if err == nil {
a.mainCache.insert(key, value)
log.WithField(fieldKey, key).WithField(fieldID, value).Debug("Allocated key")
return value, isNew, firstUse, nil
}
scopedLog := log.WithFields(logrus.Fields{
fieldKey: key,
logfields.Attempt: attempt,
})
select {
case <-ctx.Done():
scopedLog.WithError(ctx.Err()).Warning("Ongoing key allocation has been cancelled")
return 0, false, false, fmt.Errorf("key allocation cancelled: %s", ctx.Err())
default:
scopedLog.WithError(err).Warning("Key allocation attempt failed")
}
kvstore.Trace("Allocation attempt failed", err, logrus.Fields{fieldKey: key, logfields.Attempt: attempt})
if waitErr := boff.Wait(ctx); waitErr != nil {
return 0, false, false, waitErr
}
}
return 0, false, false, err
}
// GetIfLocked returns the ID which is allocated to a key. Returns an ID of NoID if no ID
// has been allocated to this key yet if the client is still holding the given
// lock.
func (a *Allocator) GetIfLocked(ctx context.Context, key AllocatorKey, lock kvstore.KVLocker) (idpool.ID, error) {
if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID {
return id, nil
}
return a.backend.GetIfLocked(ctx, key, lock)
}
// Get returns the ID which is allocated to a key. Returns an ID of NoID if no ID
// has been allocated to this key yet.
func (a *Allocator) Get(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
if id := a.mainCache.get(a.encodeKey(key)); id != idpool.NoID {
return id, nil
}
return a.GetNoCache(ctx, key)
}
// GetNoCache returns the ID which is allocated to a key in the kvstore,
// bypassing the local copy of allocated keys.
func (a *Allocator) GetNoCache(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
return a.backend.Get(ctx, key)
}
// GetByID returns the key associated with an ID. Returns nil if no key is
// associated with the ID.
func (a *Allocator) GetByID(ctx context.Context, id idpool.ID) (AllocatorKey, error) {
if key := a.mainCache.getByID(id); key != nil {
return key, nil
}
return a.backend.GetByID(ctx, id)
}
// GetIncludeRemoteCaches returns the ID which is allocated to a key. Includes the
// caches of watched remote kvstores in the query. Returns an ID of NoID if no
// ID has been allocated in any remote kvstore to this key yet.
func (a *Allocator) GetIncludeRemoteCaches(ctx context.Context, key AllocatorKey) (idpool.ID, error) {
encoded := a.encodeKey(key)
// check main cache first
if id := a.mainCache.get(encoded); id != idpool.NoID {
return id, nil
}
// check remote caches
a.remoteCachesMutex.RLock()
for _, rc := range a.remoteCaches {
if id := rc.cache.get(encoded); id != idpool.NoID {
a.remoteCachesMutex.RUnlock()
return id, nil
}
}
a.remoteCachesMutex.RUnlock()
// check main backend
if id, err := a.backend.Get(ctx, key); id != idpool.NoID || err != nil {
return id, err
}
// we skip checking remote backends explicitly here, to avoid
// accidentally overloading them in case of lookups for invalid identities
return idpool.NoID, nil
}
// GetByIDIncludeRemoteCaches returns the key associated with an ID. Includes
// the caches of watched remote kvstores in the query.
// Returns nil if no key is associated with the ID.
func (a *Allocator) GetByIDIncludeRemoteCaches(ctx context.Context, id idpool.ID) (AllocatorKey, error) {
// check main cache first
if key := a.mainCache.getByID(id); key != nil {
return key, nil
}
// check remote caches
a.remoteCachesMutex.RLock()
for _, rc := range a.remoteCaches {
if key := rc.cache.getByID(id); key != nil {
a.remoteCachesMutex.RUnlock()
return key, nil
}
}
a.remoteCachesMutex.RUnlock()
// check main backend
if key, err := a.backend.GetByID(ctx, id); key != nil || err != nil {
return key, err
}
// we skip checking remote backends explicitly here, to avoid
// accidentally overloading them in case of lookups for invalid identities
return nil, nil
}
// Release releases the use of an ID associated with the provided key. After
// the last user has released the ID, the key is removed in the KVstore and
// the returned lastUse value is true.
func (a *Allocator) Release(ctx context.Context, key AllocatorKey) (lastUse bool, err error) {
log.WithField(fieldKey, key).Info("Releasing key")
select {
case <-a.initialListDone:
case <-ctx.Done():
return false, fmt.Errorf("release was cancelled while waiting for initial key list to be received: %s", ctx.Err())
}
k := a.encodeKey(key)
a.slaveKeysMutex.Lock()
defer a.slaveKeysMutex.Unlock()
// release the key locally, if it was the last use, remove the node
// specific value key to remove the global reference mark
var id idpool.ID
lastUse, id, err = a.localKeys.release(k)
if err != nil {
return lastUse, err
}
if lastUse {
// Since in CRD mode we don't have a way to map which identity is being
// used by a node, we need to also pass the ID to the release function.
// This allows the CRD store to find the right identity by its ID and
// remove the node reference on that identity.
a.backend.Release(ctx, id, key)
}
return lastUse, err
}
// RunGC scans the kvstore for unused master keys and removes them
func (a *Allocator) RunGC(rateLimit *rate.Limiter, staleKeysPrevRound map[string]uint64) (map[string]uint64, *GCStats, error) {
return a.backend.RunGC(context.TODO(), rateLimit, staleKeysPrevRound, a.min, a.max)
}
// RunLocksGC scans the kvstore for stale locks and removes them
func (a *Allocator) RunLocksGC(ctx context.Context, staleLocksPrevRound map[string]kvstore.Value) (map[string]kvstore.Value, error) {
return a.backend.RunLocksGC(ctx, staleLocksPrevRound)
}
// DeleteAllKeys will delete all keys. It is expected to be used in tests.
func (a *Allocator) DeleteAllKeys() {
a.backend.DeleteAllKeys(context.TODO())
}
// syncLocalKeys checks the kvstore and verifies that a master key exists for
// all locally used allocations. This will restore master keys if deleted for
// some reason.
func (a *Allocator) syncLocalKeys() error {
// Create a local copy of all local allocations to not require to hold
// any locks while performing kvstore operations. Local use can
// disappear while we perform the sync but that is fine as worst case,
// a master key is created for a slave key that no longer exists. The
// garbage collector will remove it again.
ids := a.localKeys.getVerifiedIDs()
for id, value := range ids {
if err := a.backend.UpdateKey(context.TODO(), id, value, false); err != nil {
log.WithError(err).WithFields(logrus.Fields{
fieldKey: value,
fieldID: id,
}).Warning("Unable to sync key")
}
}
return nil
}
func (a *Allocator) startLocalKeySync() {
go func(a *Allocator) {
kvTimer, kvTimerDone := inctimer.New()
defer kvTimerDone()
for {
if err := a.syncLocalKeys(); err != nil {
log.WithError(err).Warning("Unable to run local key sync routine")
}
select {
case <-a.stopGC:
log.Debug("Stopped master key sync routine")
return
case <-kvTimer.After(option.Config.KVstorePeriodicSync):
}
}
}(a)
}
// AllocatorEventChan is a channel to receive allocator events on
type AllocatorEventChan chan AllocatorEvent
// Send- and receive-only versions of the above.
type AllocatorEventRecvChan = <-chan AllocatorEvent
type AllocatorEventSendChan = chan<- AllocatorEvent
// AllocatorEvent is an event sent over AllocatorEventChan
type AllocatorEvent struct {
// Typ is the type of event (create / modify / delete)
Typ kvstore.EventType
// ID is the allocated ID
ID idpool.ID
// Key is the key associated with the ID
Key AllocatorKey
}
// RemoteCache represents the cache content of an additional kvstore managing
// identities. The contents are not directly accessible but will be merged into
// the ForeachCache() function.
type RemoteCache struct {
name string
allocator *Allocator
cache *cache
watchFunc func(ctx context.Context, remote *RemoteCache, onSync func(context.Context))
}
func (a *Allocator) NewRemoteCache(remoteName string, remoteAlloc *Allocator) *RemoteCache {
return &RemoteCache{
name: remoteName,
allocator: remoteAlloc,
cache: &remoteAlloc.mainCache,
watchFunc: a.WatchRemoteKVStore,
}
}
// WatchRemoteKVStore starts watching an allocator base prefix the kvstore
// represents by the provided backend. A local cache of all identities of that
// kvstore will be maintained in the RemoteCache structure returned and will
// start being reported in the identities returned by the ForeachCache()
// function. RemoteName should be unique per logical "remote".
func (a *Allocator) WatchRemoteKVStore(ctx context.Context, rc *RemoteCache, onSync func(context.Context)) {
scopedLog := log.WithField(logfields.ClusterName, rc.name)
scopedLog.Info("Starting remote kvstore watcher")
rc.allocator.start()
select {
case <-ctx.Done():
scopedLog.Debug("Context canceled before remote kvstore watcher synchronization completed: stale identities will now be drained")
rc.close()
a.remoteCachesMutex.RLock()
old := a.remoteCaches[rc.name]
a.remoteCachesMutex.RUnlock()
if old != nil {
old.cache.mutex.RLock()
defer old.cache.mutex.RUnlock()
}
// Drain all entries that might have been received until now, and that
// are not present in the current cache (if any). This ensures we do not
// leak any stale identity, and at the same time we do not invalidate the
// current state.
rc.cache.drainIf(func(id idpool.ID) bool {
if old == nil {
return true
}
_, ok := old.cache.nextCache[id]
return !ok
})
return
case <-rc.cache.listDone:
scopedLog.Info("Remote kvstore watcher successfully synchronized and registered")
}
a.remoteCachesMutex.Lock()
old := a.remoteCaches[rc.name]
a.remoteCaches[rc.name] = rc
a.remoteCachesMutex.Unlock()
if old != nil {
// In case of reconnection, let's emit a deletion event for all stale identities
// that are no longer present in the kvstore. We take the lock of the new cache
// to ensure that we observe a stable state during this process (i.e., no keys
// are added/removed in the meanwhile).
scopedLog.Debug("Another kvstore watcher was already registered: deleting stale identities")
rc.cache.mutex.RLock()
old.cache.drainIf(func(id idpool.ID) bool {
_, ok := rc.cache.nextCache[id]
return !ok
})
rc.cache.mutex.RUnlock()
}
// Execute the on-sync callback handler.
onSync(ctx)