forked from kubernetes/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 1
/
resource_quota_controller.go
441 lines (403 loc) · 15.4 KB
/
resource_quota_controller.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resourcequota
import (
"fmt"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/runtime"
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/pkg/watch"
)
// ResourceQuotaController is responsible for tracking quota usage status in the system
type ResourceQuotaController struct {
// Must have authority to list all resources in the system, and update quota status
kubeClient clientset.Interface
// An index of resource quota objects by namespace
rqIndexer cache.Indexer
// Watches changes to all resource quota
rqController *framework.Controller
// A store of pods, populated by the podController
podStore cache.StoreToPodLister
// Watches changes to all pods (so we can optimize release of compute resources)
podController *framework.Controller
// ResourceQuota objects that need to be synchronized
queue *workqueue.Type
// To allow injection of syncUsage for testing.
syncHandler func(key string) error
// function that controls full recalculation of quota usage
resyncPeriod controller.ResyncPeriodFunc
}
// NewResourceQuotaController creates a new ResourceQuotaController
func NewResourceQuotaController(kubeClient clientset.Interface, resyncPeriod controller.ResyncPeriodFunc) *ResourceQuotaController {
rq := &ResourceQuotaController{
kubeClient: kubeClient,
queue: workqueue.New(),
resyncPeriod: resyncPeriod,
}
rq.rqIndexer, rq.rqController = framework.NewIndexerInformer(
&cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
return rq.kubeClient.Core().ResourceQuotas(api.NamespaceAll).List(options)
},
WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
return rq.kubeClient.Core().ResourceQuotas(api.NamespaceAll).Watch(options)
},
},
&api.ResourceQuota{},
resyncPeriod(),
framework.ResourceEventHandlerFuncs{
AddFunc: rq.enqueueResourceQuota,
UpdateFunc: func(old, cur interface{}) {
// We are only interested in observing updates to quota.spec to drive updates to quota.status.
// We ignore all updates to quota.Status because they are all driven by this controller.
// IMPORTANT:
// We do not use this function to queue up a full quota recalculation. To do so, would require
// us to enqueue all quota.Status updates, and since quota.Status updates involve additional queries
// that cannot be backed by a cache and result in a full query of a namespace's content, we do not
// want to pay the price on spurious status updates. As a result, we have a separate routine that is
// responsible for enqueue of all resource quotas when doing a full resync (enqueueAll)
oldResourceQuota := old.(*api.ResourceQuota)
curResourceQuota := cur.(*api.ResourceQuota)
if api.Semantic.DeepEqual(oldResourceQuota.Spec.Hard, curResourceQuota.Status.Hard) {
return
}
glog.V(4).Infof("Observed updated quota spec for %v/%v", curResourceQuota.Namespace, curResourceQuota.Name)
rq.enqueueResourceQuota(curResourceQuota)
},
// This will enter the sync loop and no-op, because the controller has been deleted from the store.
// Note that deleting a controller immediately after scaling it to 0 will not work. The recommended
// way of achieving this is by performing a `stop` operation on the controller.
DeleteFunc: rq.enqueueResourceQuota,
},
cache.Indexers{"namespace": cache.MetaNamespaceIndexFunc},
)
// We use this pod controller to rapidly observe when a pod deletion occurs in order to
// release compute resources from any associated quota.
rq.podStore.Store, rq.podController = framework.NewInformer(
&cache.ListWatch{
ListFunc: func(options api.ListOptions) (runtime.Object, error) {
return rq.kubeClient.Core().Pods(api.NamespaceAll).List(options)
},
WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
return rq.kubeClient.Core().Pods(api.NamespaceAll).Watch(options)
},
},
&api.Pod{},
resyncPeriod(),
framework.ResourceEventHandlerFuncs{
DeleteFunc: rq.deletePod,
},
)
// set the synchronization handler
rq.syncHandler = rq.syncResourceQuotaFromKey
return rq
}
// enqueueAll is called at the fullResyncPeriod interval to force a full recalculation of quota usage statistics
func (rq *ResourceQuotaController) enqueueAll() {
defer glog.V(4).Infof("Resource quota controller queued all resource quota for full calculation of usage")
for _, k := range rq.rqIndexer.ListKeys() {
rq.queue.Add(k)
}
}
// obj could be an *api.ResourceQuota, or a DeletionFinalStateUnknown marker item.
func (rq *ResourceQuotaController) enqueueResourceQuota(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
glog.Errorf("Couldn't get key for object %+v: %v", obj, err)
return
}
rq.queue.Add(key)
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (rq *ResourceQuotaController) worker() {
for {
func() {
key, quit := rq.queue.Get()
if quit {
return
}
defer rq.queue.Done(key)
err := rq.syncHandler(key.(string))
if err != nil {
utilruntime.HandleError(err)
}
}()
}
}
// Run begins quota controller using the specified number of workers
func (rq *ResourceQuotaController) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
go rq.rqController.Run(stopCh)
go rq.podController.Run(stopCh)
for i := 0; i < workers; i++ {
go wait.Until(rq.worker, time.Second, stopCh)
}
go wait.Until(func() { rq.enqueueAll() }, rq.resyncPeriod(), stopCh)
<-stopCh
glog.Infof("Shutting down ResourceQuotaController")
rq.queue.ShutDown()
}
// FilterQuotaPods eliminates pods that no longer have a cost against the quota
// pods that have a restart policy of always are always returned
// pods that are in a failed state, but have a restart policy of on failure are always returned
// pods that are not in a success state or a failure state are included in quota
func FilterQuotaPods(pods []api.Pod) []*api.Pod {
var result []*api.Pod
for i := range pods {
value := &pods[i]
// a pod that has a restart policy always no matter its state counts against usage
if value.Spec.RestartPolicy == api.RestartPolicyAlways {
result = append(result, value)
continue
}
// a failed pod with a restart policy of on failure will count against usage
if api.PodFailed == value.Status.Phase &&
value.Spec.RestartPolicy == api.RestartPolicyOnFailure {
result = append(result, value)
continue
}
// if the pod is not succeeded or failed, then we count it against quota
if api.PodSucceeded != value.Status.Phase &&
api.PodFailed != value.Status.Phase {
result = append(result, value)
continue
}
}
return result
}
// syncResourceQuotaFromKey syncs a quota key
func (rq *ResourceQuotaController) syncResourceQuotaFromKey(key string) (err error) {
startTime := time.Now()
defer func() {
glog.V(4).Infof("Finished syncing resource quota %q (%v)", key, time.Now().Sub(startTime))
}()
obj, exists, err := rq.rqIndexer.GetByKey(key)
if !exists {
glog.Infof("Resource quota has been deleted %v", key)
return nil
}
if err != nil {
glog.Infof("Unable to retrieve resource quota %v from store: %v", key, err)
rq.queue.Add(key)
return err
}
quota := *obj.(*api.ResourceQuota)
return rq.syncResourceQuota(quota)
}
// syncResourceQuota runs a complete sync of current status
func (rq *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (err error) {
// quota is dirty if any part of spec hard limits differs from the status hard limits
dirty := !api.Semantic.DeepEqual(quota.Spec.Hard, quota.Status.Hard)
// dirty tracks if the usage status differs from the previous sync,
// if so, we send a new usage with latest status
// if this is our first sync, it will be dirty by default, since we need track usage
dirty = dirty || (quota.Status.Hard == nil || quota.Status.Used == nil)
// Create a usage object that is based on the quota resource version
usage := api.ResourceQuota{
ObjectMeta: api.ObjectMeta{
Name: quota.Name,
Namespace: quota.Namespace,
ResourceVersion: quota.ResourceVersion,
Labels: quota.Labels,
Annotations: quota.Annotations},
Status: api.ResourceQuotaStatus{
Hard: api.ResourceList{},
Used: api.ResourceList{},
},
}
// set the hard values supported on the quota
for k, v := range quota.Spec.Hard {
usage.Status.Hard[k] = *v.Copy()
}
// set any last known observed status values for usage
for k, v := range quota.Status.Used {
usage.Status.Used[k] = *v.Copy()
}
set := map[api.ResourceName]bool{}
for k := range usage.Status.Hard {
set[k] = true
}
pods := &api.PodList{}
if set[api.ResourcePods] || set[api.ResourceMemory] || set[api.ResourceCPU] {
pods, err = rq.kubeClient.Core().Pods(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
}
filteredPods := FilterQuotaPods(pods.Items)
// iterate over each resource, and update observation
for k := range usage.Status.Hard {
// look if there is a used value, if none, we are definitely dirty
prevQuantity, found := usage.Status.Used[k]
if !found {
dirty = true
}
var value *resource.Quantity
switch k {
case api.ResourcePods:
value = resource.NewQuantity(int64(len(filteredPods)), resource.DecimalSI)
case api.ResourceServices:
items, err := rq.kubeClient.Core().Services(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceReplicationControllers:
items, err := rq.kubeClient.Core().ReplicationControllers(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceQuotas:
items, err := rq.kubeClient.Core().ResourceQuotas(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceSecrets:
items, err := rq.kubeClient.Core().Secrets(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourcePersistentVolumeClaims:
items, err := rq.kubeClient.Core().PersistentVolumeClaims(usage.Namespace).List(api.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceMemory:
value = PodsRequests(filteredPods, api.ResourceMemory)
case api.ResourceCPU:
value = PodsRequests(filteredPods, api.ResourceCPU)
}
// ignore fields we do not understand (assume another controller is tracking it)
if value != nil {
// see if the value has changed
dirty = dirty || (value.Value() != prevQuantity.Value())
// just update the value
usage.Status.Used[k] = *value
}
}
// update the usage only if it changed
if dirty {
_, err = rq.kubeClient.Core().ResourceQuotas(usage.Namespace).UpdateStatus(&usage)
return err
}
return nil
}
// PodsRequests returns sum of each resource request for each pod in list
// If a given pod in the list does not have a request for the named resource, we log the error
// but still attempt to get the most representative count
func PodsRequests(pods []*api.Pod, resourceName api.ResourceName) *resource.Quantity {
var sum *resource.Quantity
for i := range pods {
pod := pods[i]
podQuantity, err := PodRequests(pod, resourceName)
if err != nil {
// log the error, but try to keep the most accurate count possible in log
// rationale here is that you may have had pods in a namespace that did not have
// explicit requests prior to adding the quota
glog.Infof("No explicit request for resource, pod %s/%s, %s", pod.Namespace, pod.Name, resourceName)
} else {
if sum == nil {
sum = podQuantity
} else {
sum.Add(*podQuantity)
}
}
}
// if list is empty
if sum == nil {
q := resource.MustParse("0")
sum = &q
}
return sum
}
// PodRequests returns sum of each resource request across all containers in pod
func PodRequests(pod *api.Pod, resourceName api.ResourceName) (*resource.Quantity, error) {
if !PodHasRequests(pod, resourceName) {
return nil, fmt.Errorf("Each container in pod %s/%s does not have an explicit request for resource %s.", pod.Namespace, pod.Name, resourceName)
}
var sum *resource.Quantity
for j := range pod.Spec.Containers {
value, _ := pod.Spec.Containers[j].Resources.Requests[resourceName]
if sum == nil {
sum = value.Copy()
} else {
err := sum.Add(value)
if err != nil {
return sum, err
}
}
}
// if list is empty
if sum == nil {
q := resource.MustParse("0")
sum = &q
}
return sum, nil
}
// PodHasRequests verifies that each container in the pod has an explicit request that is non-zero for a named resource
func PodHasRequests(pod *api.Pod, resourceName api.ResourceName) bool {
for j := range pod.Spec.Containers {
value, valueSet := pod.Spec.Containers[j].Resources.Requests[resourceName]
if !valueSet || value.Value() == int64(0) {
return false
}
}
return true
}
// When a pod is deleted, enqueue the quota that manages the pod and update its expectations.
// obj could be an *api.Pod, or a DeletionFinalStateUnknown marker item.
func (rq *ResourceQuotaController) deletePod(obj interface{}) {
pod, ok := obj.(*api.Pod)
// When a delete is dropped, the relist will notice a pod in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the pod
// changed labels the new rc will not be woken up till the periodic resync.
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Couldn't get object from tombstone %+v, could take up to %v before a quota records the deletion", obj, rq.resyncPeriod())
return
}
pod, ok = tombstone.Obj.(*api.Pod)
if !ok {
glog.Errorf("Tombstone contained object that is not a pod %+v, could take up to %v before quota records the deletion", obj, rq.resyncPeriod())
return
}
}
quotas, err := rq.rqIndexer.Index("namespace", pod)
if err != nil {
glog.Errorf("Couldn't find resource quota associated with pod %+v, could take up to %v before a quota records the deletion", obj, rq.resyncPeriod())
}
if len(quotas) == 0 {
glog.V(4).Infof("No resource quota associated with namespace %q", pod.Namespace)
return
}
for i := range quotas {
quota := quotas[i].(*api.ResourceQuota)
rq.enqueueResourceQuota(quota)
}
}