-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
retry_handler.go
256 lines (223 loc) · 7.09 KB
/
retry_handler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
package kubernetes
import (
"fmt"
"strconv"
"sync"
"time"
"github.com/hashicorp/go-hclog"
sr "github.com/hashicorp/vault/serviceregistration"
"github.com/hashicorp/vault/serviceregistration/kubernetes/client"
"github.com/oklog/run"
)
// How often to retry sending a state update if it fails.
var retryFreq = 5 * time.Second
// retryHandler executes retries.
// It is thread-safe.
type retryHandler struct {
// These don't need a mutex because they're never mutated.
logger hclog.Logger
namespace, podName string
// To synchronize setInitialState and patchesToRetry.
lock sync.Mutex
// initialStateSet determines whether an initial state has been set
// successfully or whether a state already exists.
initialStateSet bool
// State stores an initial state to be set
initialState sr.State
// The map holds the path to the label being updated. It will only either
// not hold a particular label, or hold _the last_ state we were aware of.
// These should only be updated after initial state has been set.
patchesToRetry map[string]*client.Patch
// client is the Client to use when making API calls against kubernetes
client *client.Client
}
// Run must be called for retries to be started.
func (r *retryHandler) Run(shutdownCh <-chan struct{}, wait *sync.WaitGroup) {
r.setInitialState(shutdownCh)
// Run this in a go func so this call doesn't block.
wait.Add(1)
go func() {
// Make sure Vault will give us time to finish up here.
defer wait.Done()
var g run.Group
// This run group watches for the shutdownCh
shutdownActorStop := make(chan struct{})
g.Add(func() error {
select {
case <-shutdownCh:
case <-shutdownActorStop:
}
return nil
}, func(error) {
close(shutdownActorStop)
})
checkUpdateStateStop := make(chan struct{})
g.Add(func() error {
r.periodicUpdateState(checkUpdateStateStop)
return nil
}, func(error) {
close(checkUpdateStateStop)
r.client.Shutdown()
})
if err := g.Run(); err != nil {
r.logger.Error("error encountered during periodic state update", "error", err)
}
}()
}
func (r *retryHandler) setInitialState(shutdownCh <-chan struct{}) {
r.lock.Lock()
defer r.lock.Unlock()
doneCh := make(chan struct{})
go func() {
if err := r.setInitialStateInternal(); err != nil {
if r.logger.IsWarn() {
r.logger.Warn(fmt.Sprintf("unable to set initial state due to %s, will retry", err.Error()))
}
}
close(doneCh)
}()
// Wait until the state is set or shutdown happens
select {
case <-doneCh:
case <-shutdownCh:
}
}
// Notify adds a patch to be retried until it's either completed without
// error, or no longer needed.
func (r *retryHandler) Notify(patch *client.Patch) {
r.lock.Lock()
defer r.lock.Unlock()
// Initial state must be set first, or subsequent notifications we've
// received could get smashed by a late-arriving initial state.
// We will store this to retry it when appropriate.
if !r.initialStateSet {
if r.logger.IsWarn() {
r.logger.Warn(fmt.Sprintf("cannot notify of present state for %s because initial state is unset", patch.Path))
}
r.patchesToRetry[patch.Path] = patch
return
}
// Initial state has been sent, so it's OK to attempt a patch immediately.
if err := r.client.PatchPod(r.namespace, r.podName, patch); err != nil {
if r.logger.IsWarn() {
r.logger.Warn(fmt.Sprintf("unable to update state for %s due to %s, will retry", patch.Path, err.Error()))
}
r.patchesToRetry[patch.Path] = patch
}
}
// setInitialStateInternal sets the initial state remotely. This should be
// called with the lock held.
func (r *retryHandler) setInitialStateInternal() error {
// If this is set, we return immediately
if r.initialStateSet {
return nil
}
// Verify that the pod exists and our configuration looks good.
pod, err := r.client.GetPod(r.namespace, r.podName)
if err != nil {
return err
}
// Now to initially label our pod.
if pod.Metadata == nil {
// This should never happen IRL, just being defensive.
return fmt.Errorf("no pod metadata on %+v", pod)
}
if pod.Metadata.Labels == nil {
// Notify the labels field, and the labels as part of that one call.
// The reason we must take a different approach to adding them is discussed here:
// https://stackoverflow.com/questions/57480205/error-while-applying-json-patch-to-kubernetes-custom-resource
if err := r.client.PatchPod(r.namespace, r.podName, &client.Patch{
Operation: client.Add,
Path: "/metadata/labels",
Value: map[string]string{
labelVaultVersion: r.initialState.VaultVersion,
labelActive: strconv.FormatBool(r.initialState.IsActive),
labelSealed: strconv.FormatBool(r.initialState.IsSealed),
labelPerfStandby: strconv.FormatBool(r.initialState.IsPerformanceStandby),
labelInitialized: strconv.FormatBool(r.initialState.IsInitialized),
},
}); err != nil {
return err
}
} else {
// Create the labels through a patch to each individual field.
patches := []*client.Patch{
{
Operation: client.Replace,
Path: pathToLabels + labelVaultVersion,
Value: r.initialState.VaultVersion,
},
{
Operation: client.Replace,
Path: pathToLabels + labelActive,
Value: strconv.FormatBool(r.initialState.IsActive),
},
{
Operation: client.Replace,
Path: pathToLabels + labelSealed,
Value: strconv.FormatBool(r.initialState.IsSealed),
},
{
Operation: client.Replace,
Path: pathToLabels + labelPerfStandby,
Value: strconv.FormatBool(r.initialState.IsPerformanceStandby),
},
{
Operation: client.Replace,
Path: pathToLabels + labelInitialized,
Value: strconv.FormatBool(r.initialState.IsInitialized),
},
}
if err := r.client.PatchPod(r.namespace, r.podName, patches...); err != nil {
return err
}
}
r.initialStateSet = true
return nil
}
func (r *retryHandler) periodicUpdateState(stopCh chan struct{}) {
retry := time.NewTicker(retryFreq)
defer retry.Stop()
for {
// Call updateState immediately so we don't wait for the first tick
// if setting the initial state
r.updateState()
select {
case <-stopCh:
return
case <-retry.C:
}
}
}
func (r *retryHandler) updateState() {
r.lock.Lock()
defer r.lock.Unlock()
// Initial state must be set first, or subsequent notifications we've
// received could get smashed by a late-arriving initial state.
// If the state is already set, this is a no-op.
if err := r.setInitialStateInternal(); err != nil {
if r.logger.IsWarn() {
r.logger.Warn(fmt.Sprintf("unable to set initial state due to %s, will retry", err.Error()))
}
// On failure, we leave the initial state func populated for
// the next retry.
return
}
if len(r.patchesToRetry) == 0 {
// Nothing further to do here.
return
}
patches := make([]*client.Patch, len(r.patchesToRetry))
i := 0
for _, patch := range r.patchesToRetry {
patches[i] = patch
i++
}
if err := r.client.PatchPod(r.namespace, r.podName, patches...); err != nil {
if r.logger.IsWarn() {
r.logger.Warn(fmt.Sprintf("unable to update state for due to %s, will retry", err.Error()))
}
return
}
r.patchesToRetry = make(map[string]*client.Patch)
}