forked from kubernetes-retired/bootkube
/
state.go
343 lines (287 loc) · 10.6 KB
/
state.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
package checkpoint
import (
"fmt"
"time"
"github.com/golang/glog"
)
// apiCondition represents information returned from the various api endpoints for a given pod.
type apiCondition struct {
// apiAvailable is true if the apiserver was reachable.
apiAvailable bool
// apiParent is true if the api parent pod exists.
apiParent bool
// localRunning is true if the CRI shim reports that the pod is running locally.
localRunning bool
// localParent is true if the kubelet parent pod exists.
localParent bool
}
// String() implements fmt.Stringer.String().
func (a apiCondition) String() string {
return fmt.Sprintf("apiAvailable=%t, apiParent=%t, localRunning=%t, localParent=%t", a.apiAvailable, a.apiParent, a.localRunning, a.localParent)
}
// action represents the action to be taken based on the state of a checkpoint.
type action int
const (
// none is the default action of "do nothing".
none = iota
// start means that the checkpoint should be started.
start
// stop means that the checkpoint should be stopped.
stop
// remove means that the checkpoint should be garbage collected.
remove
)
// String() implements fmt.Stringer.String().
func (a action) String() string {
switch a {
case none:
return "none"
case start:
return "start"
case stop:
return "stop"
case remove:
return "remove"
default:
return "[unknown action]"
}
}
// checkpointState represents the current state of a checkpoint.
type checkpointState interface {
// transition computes the new state for the current time and information from various apis.
transition(time.Time, apiCondition) checkpointState
// action returns the action that should be taken for this state.
action() action
}
// stateSelfCheckpointActive represents a checkpoint of the checkpointer itself, which has special
// behavior.
//
// stateSelfCheckpointActive can transition to stateActiveGracePeriod.
type stateSelfCheckpointActive struct{}
// transition implements state.transition()
func (s stateSelfCheckpointActive) transition(now time.Time, apis apiCondition) checkpointState {
if !apis.apiAvailable {
// If the apiserver is unavailable always stay in the selfCheckpoint state.
return s
}
if apis.apiParent {
// If the parent pod exists always stay in the selfCheckpoint state.
return s
}
// The apiserver parent pod is deleted, transition to stateActiveGracePeriod.
// TODO(diegs): this is a little hacky, perhaps clean it up with a constructor.
return stateActiveGracePeriod{gracePeriodEnd: now.Add(checkpointGracePeriod)}.checkGracePeriod(now, apis)
}
// action implements state.action()
func (s stateSelfCheckpointActive) action() action {
// The self-checkpoint should always be started.
return start
}
// String() implements fmt.Stringer.String().
func (s stateSelfCheckpointActive) String() string {
return "self-checkpoint"
}
// stateNone represents a new pod that has not been processed yet, so it has no checkpoint state.
//
// stateNone can transition to stateInactive, stateInactiveGracePeriod, or stateActive.
type stateNone struct{}
// transition implements state.transition()
func (s stateNone) transition(now time.Time, apis apiCondition) checkpointState {
// Newly discovered pods are treated as mostly inactive, but only if there is either a local
// running pod or kubelet parent pod. In other words, if the new pod is only reflected in the
// apiserver we do not checkpoint it yet.
if apis.localRunning || apis.localParent {
return stateInactive{}.transition(now, apis)
}
return s
}
// action implements state.action()
func (s stateNone) action() action {
return none
}
// String() implements fmt.Stringer.String().
func (s stateNone) String() string {
return "none"
}
// stateInactive is a checkpoint that is currently sitting inactive on disk.
//
// stateInactive can transition to stateActive or stateInactiveGracePeriod.
type stateInactive struct{}
// transition implements state.transition()
func (s stateInactive) transition(now time.Time, apis apiCondition) checkpointState {
if !apis.apiAvailable {
// The apiserver is unavailable but the local copy is running, remain in stateInactive.
if apis.localRunning {
return s
}
// The apiserver is unavailable and the local pod is not running, transition to stateActive.
return stateActive{}
}
if apis.apiParent {
// The parent pod exists and the kubelet is running it, remain in stateInactive.
if apis.localRunning {
return s
}
// The parent pod exists but the kubelet is not running it, transition to stateActive.
return stateActive{}
}
// The apiserver parent pod is deleted, transition to stateInactiveGracePeriod.
// TODO(diegs): this is a little hacky, perhaps clean it up with a constructor.
return stateInactiveGracePeriod{gracePeriodEnd: now.Add(checkpointGracePeriod)}.checkGracePeriod(now, apis)
}
// action implements state.action()
func (s stateInactive) action() action {
return stop
}
// String() implements fmt.Stringer.String().
func (s stateInactive) String() string {
return "inactive"
}
// stateInactiveGracePeriod is a checkpoint that is inactive but will be garbage collected after a
// grace period.
//
// stateInactiveGracePeriod can transition to stateInactive, stateActive, or stateRemove.
type stateInactiveGracePeriod struct {
// gracePeriodEnd is the time when the grace period for this checkpoint is over and it should be
// garbage collected.
gracePeriodEnd time.Time
}
// transition implements state.transition()
func (s stateInactiveGracePeriod) transition(now time.Time, apis apiCondition) checkpointState {
if !apis.apiAvailable {
// The apiserver is unavailable but the local copy is running, remain in
// stateInactiveGracePeriod.
if apis.localRunning {
return s.checkGracePeriod(now, apis)
}
// The apiserver is unavailable and the local pod is not running, transition to stateActive.
return stateActive{}
}
if apis.apiParent {
// The parent pod exists and the kubelet is running it, remain in inactive.
if apis.localRunning {
return stateInactive{}
}
// The parent pod exists but the kubelet is not running it, transition to stateActive.
return stateActive{}
}
// The apiserver pod is still deleted, remain in stateInactiveGracePeriod.
return s.checkGracePeriod(now, apis)
}
func (s stateInactiveGracePeriod) checkGracePeriod(now time.Time, apis apiCondition) checkpointState {
// Override state to remove if the grace period has passed.
if now.Equal(s.gracePeriodEnd) || now.After(s.gracePeriodEnd) {
glog.Infof("Grace period exceeded for state %s", s)
return stateRemove{}
}
return s
}
// action implements state.action()
func (s stateInactiveGracePeriod) action() action {
return stop
}
// String() implements fmt.Stringer.String().
func (s stateInactiveGracePeriod) String() string {
return "inactive (grace period)"
}
// stateActive is a checkpoint that is currently activated.
//
// stateActive can transition to stateInactive or stateActiveGracePeriod.
type stateActive struct{}
// transition implements state.transition()
func (s stateActive) transition(now time.Time, apis apiCondition) checkpointState {
if !apis.apiAvailable {
// The apiserver is unavailable but the local copy is running, transition to inactive.
if apis.localRunning {
return stateInactive{}
}
// The apiserver is unavailable and the local pod is not running, remain in stateActive.
return s
}
if apis.apiParent {
// The parent pod exists and the kubelet is running it, transition to inactive.
if apis.localRunning {
return stateInactive{}
}
// The parent pod exists but the kubelet is not running it, remain in stateActive.
return s
}
// The apiserver pod is deleted, transition to stateActiveGracePeriod.
// TODO(diegs): this is a little hacky, perhaps clean it up with a constructor.
return stateActiveGracePeriod{gracePeriodEnd: now.Add(checkpointGracePeriod)}.checkGracePeriod(now, apis)
}
// action implements state.action()
func (s stateActive) action() action {
return start
}
// String() implements fmt.Stringer.String().
func (s stateActive) String() string {
return "active"
}
// stateActiveGracePeriod is a checkpoint that is active but will be garbage collected after a grace
// period.
//
// stateActiveGracePeriod can transition to stateActive or stateInactive.
type stateActiveGracePeriod struct {
// gracePeriodEnd is the time when the grace period for this checkpoint is over and it should be
// garbage collected.
gracePeriodEnd time.Time
}
// transition implements state.transition()
func (s stateActiveGracePeriod) transition(now time.Time, apis apiCondition) checkpointState {
if !apis.apiAvailable {
// The apiserver is unavailable but the local copy is running, transition to stateInactive.
if apis.localRunning {
return stateInactive{}
}
// The apiserver is unavailable and the local pod is not running, remain in
// stateActiveGracePeriod.
return s.checkGracePeriod(now, apis)
}
if apis.apiParent {
// The parent pod exists and the kubelet is running it, transition to stateInactive.
if apis.localRunning {
return stateInactive{}
}
// The parent pod exists but the kubelet is not running it, transition to stateActive.
return stateActive{}
}
// The apiserver pod is still deleted, remain in stateActiveGracePeriod.
return s.checkGracePeriod(now, apis)
}
func (s stateActiveGracePeriod) checkGracePeriod(now time.Time, apis apiCondition) checkpointState {
// Override state to stateInactiveGracePeriod.transition() as if the grace period has passed. This
// has the effect of either transitioning to stateInactive or stateRemove.
if now.Equal(s.gracePeriodEnd) || now.After(s.gracePeriodEnd) {
glog.Infof("Grace period exceeded for state %s", s)
return stateInactiveGracePeriod{gracePeriodEnd: now}.transition(now, apis)
}
return s
}
// action implements state.action()
func (s stateActiveGracePeriod) action() action {
return start
}
// String() implements fmt.Stringer.String().
func (s stateActiveGracePeriod) String() string {
return "active (grace period)"
}
// stateRemove is a checkpoint that is being garbage collected.
//
// It is a terminal state that can never transition to other states; checkpoints in this state are
// removed as part of the update loop.
type stateRemove struct{}
// transition implements state.transition()
func (s stateRemove) transition(now time.Time, apis apiCondition) checkpointState {
// Remove is a terminal state. This should never actually be called.
glog.Errorf("Unexpected call to transition() for state %s", s)
return s
}
// action implements state.action()
func (s stateRemove) action() action {
return remove
}
// String() implements fmt.Stringer.String().
func (s stateRemove) String() string {
return "remove"
}