forked from hashicorp/consul
-
Notifications
You must be signed in to change notification settings - Fork 0
/
roots.go
152 lines (135 loc) · 5.17 KB
/
roots.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package leafcert
import (
"context"
"sync"
"sync/atomic"
"github.com/hernad/consul/agent/cache"
"github.com/hernad/consul/agent/structs"
)
// rootWatcher helps let multiple requests for leaf certs to coordinate sharing
// a single long-lived watch for the root certs. This allows the leaf cert
// requests to notice when the roots rotate and trigger their reissuance.
type rootWatcher struct {
// This is the "top-level" internal context. This is used to cancel
// background operations.
ctx context.Context
// rootsReader is an interface to access connect CA roots.
rootsReader RootsReader
// lock protects access to the subscribers map and cancel
lock sync.Mutex
// subscribers is a set of chans, one for each currently in-flight
// Fetch. These chans have root updates delivered from the root watcher.
subscribers map[chan struct{}]struct{}
// cancel is a func to call to stop the background root watch if any.
// You must hold lock to read (e.g. call) or write the value.
cancel func()
// testStart/StopCount are testing helpers that allow tests to
// observe the reference counting behavior that governs the shared root watch.
// It's not exactly pretty to expose internals like this, but seems cleaner
// than constructing elaborate and brittle test cases that we can infer
// correct behavior from, and simpler than trying to probe runtime goroutine
// traces to infer correct behavior that way. They must be accessed
// atomically.
testStartCount uint32
testStopCount uint32
}
// Subscribe is called on each fetch that is about to block and wait for
// changes to the leaf. It subscribes a chan to receive updates from the shared
// root watcher and triggers root watcher if it's not already running.
func (r *rootWatcher) Subscribe(rootUpdateCh chan struct{}) {
r.lock.Lock()
defer r.lock.Unlock()
// Lazy allocation
if r.subscribers == nil {
r.subscribers = make(map[chan struct{}]struct{})
}
// Make sure a root watcher is running. We don't only do this on first request
// to be more tolerant of errors that could cause the root watcher to fail and
// exit.
if r.cancel == nil {
ctx, cancel := context.WithCancel(r.ctx)
r.cancel = cancel
go r.rootWatcher(ctx)
}
r.subscribers[rootUpdateCh] = struct{}{}
}
// Unsubscribe is called when a blocking call exits to unsubscribe from root
// updates and possibly stop the shared root watcher if it's no longer needed.
// Note that typically root CA is still being watched by clients directly and
// probably by the ProxyConfigManager so it will stay hot in cache for a while,
// we are just not monitoring it for updates any more.
func (r *rootWatcher) Unsubscribe(rootUpdateCh chan struct{}) {
r.lock.Lock()
defer r.lock.Unlock()
delete(r.subscribers, rootUpdateCh)
if len(r.subscribers) == 0 && r.cancel != nil {
// This was the last request. Stop the root watcher.
r.cancel()
r.cancel = nil
}
}
func (r *rootWatcher) notifySubscribers() {
r.lock.Lock()
defer r.lock.Unlock()
for ch := range r.subscribers {
select {
case ch <- struct{}{}:
default:
// Don't block - chans are 1-buffered so this default case
// means the subscriber already holds an update signal.
}
}
}
// rootWatcher is the shared rootWatcher that runs in a background goroutine
// while needed by one or more inflight Fetch calls.
func (r *rootWatcher) rootWatcher(ctx context.Context) {
atomic.AddUint32(&r.testStartCount, 1)
defer atomic.AddUint32(&r.testStopCount, 1)
ch := make(chan cache.UpdateEvent, 1)
if err := r.rootsReader.Notify(ctx, "roots", ch); err != nil {
// Trigger all inflight watchers. We don't pass the error, but they will
// reload from cache and observe the same error and return it to the caller,
// or if it's transient, will continue and the next Fetch will get us back
// into the right state. Seems better than busy loop-retrying here given
// that almost any error we would see here would also be returned from the
// cache get this will trigger.
r.notifySubscribers()
return
}
var oldRoots *structs.IndexedCARoots
// Wait for updates to roots or all requests to stop
for {
select {
case <-ctx.Done():
return
case e := <-ch:
// Root response changed in some way. Note this might be the initial
// fetch.
if e.Err != nil {
// See above rationale about the error propagation
r.notifySubscribers()
continue
}
roots, ok := e.Result.(*structs.IndexedCARoots)
if !ok {
// See above rationale about the error propagation
r.notifySubscribers()
continue
}
// Check that the active root is actually different from the last CA
// config there are many reasons the config might have changed without
// actually updating the CA root that is signing certs in the cluster.
// The Fetch calls will also validate this since the first call here we
// don't know if it changed or not, but there is no point waking up all
// Fetch calls to check this if we know none of them will need to act on
// this update.
if oldRoots != nil && oldRoots.ActiveRootID == roots.ActiveRootID {
continue
}
// Distribute the update to all inflight requests - they will decide
// whether or not they need to act on it.
r.notifySubscribers()
oldRoots = roots
}
}
}