forked from snapcore/snapd
/
quota_handlers.go
532 lines (449 loc) · 18 KB
/
quota_handlers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
// -*- Mode: Go; indent-tabs-mode: t -*-
/*
* Copyright (C) 2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package servicestate
import (
"fmt"
"sort"
"time"
tomb "gopkg.in/tomb.v2"
"github.com/snapcore/snapd/gadget/quantity"
"github.com/snapcore/snapd/logger"
"github.com/snapcore/snapd/overlord/snapstate"
"github.com/snapcore/snapd/overlord/state"
"github.com/snapcore/snapd/progress"
"github.com/snapcore/snapd/snap"
"github.com/snapcore/snapd/snap/quota"
"github.com/snapcore/snapd/snapdenv"
"github.com/snapcore/snapd/strutil"
"github.com/snapcore/snapd/systemd"
"github.com/snapcore/snapd/timings"
"github.com/snapcore/snapd/wrappers"
)
// QuotaControlAction is the serialized representation of a quota group
// modification that lives in a task.
type QuotaControlAction struct {
// QuotaName is the name of the quota group being controlled.
QuotaName string `json:"quota-name"`
// Action is the action being taken on the quota group. It can be either
// "create", "update", or "remove".
Action string `json:"action"`
// AddSnaps is the set of snaps to add to the quota group, valid for either
// the "update" or the "create" actions.
AddSnaps []string `json:"snaps"`
// MemoryLimit is the memory limit for the quota group being controlled,
// either the initial limit the group is created with for the "create"
// action, or if non-zero for the "update" the memory limit, then the new
// value to be set.
MemoryLimit quantity.Size
// ParentName is the name of the parent for the quota group if it is being
// created. Eventually this could be used with the "update" action to
// support moving quota groups from one parent to another, but that is
// currently not supported.
ParentName string
}
func (m *ServiceManager) doQuotaControl(t *state.Task, _ *tomb.Tomb) error {
st := t.State()
st.Lock()
defer st.Unlock()
perfTimings := state.TimingsForTask(t)
defer perfTimings.Save(st)
meter := snapstate.NewTaskProgressAdapterUnlocked(t)
qcs := []QuotaControlAction{}
err := t.Get("quota-control-actions", &qcs)
if err != nil {
return fmt.Errorf("internal error: cannot get quota-control-action: %v", err)
}
// TODO: support more than one action
switch {
case len(qcs) > 1:
return fmt.Errorf("multiple quota group actions not supported yet")
case len(qcs) == 0:
return fmt.Errorf("internal error: no quota group actions for quota-control task")
}
qc := qcs[0]
allGrps, err := AllQuotas(st)
if err != nil {
return err
}
switch qc.Action {
case "create":
err = quotaCreate(st, t, qc, allGrps, meter, perfTimings)
case "remove":
err = quotaRemove(st, t, qc, allGrps, meter, perfTimings)
case "update":
err = quotaUpdate(st, t, qc, allGrps, meter, perfTimings)
default:
err = fmt.Errorf("unknown action %q requested", qc.Action)
}
return err
}
func quotaCreate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
// make sure the group does not exist yet
if _, ok := allGrps[action.QuotaName]; ok {
return fmt.Errorf("group %q already exists", action.QuotaName)
}
// make sure the memory limit is not zero
// TODO: this needs to be updated to 4K when PR snapcore/snapd#10346 lands
// and an equivalent check needs to be put back into CreateQuota() before
// the tasks are created
if action.MemoryLimit == 0 {
return fmt.Errorf("internal error, MemoryLimit option is mandatory for create action")
}
// make sure the memory limit is at least 4K, that is the minimum size
// to allow nesting, otherwise groups with less than 4K will trigger the
// oom killer to be invoked when a new group is added as a sub-group to the
// larger group.
if action.MemoryLimit <= 4*quantity.SizeKiB {
return fmt.Errorf("memory limit for group %q is too small: size must be larger than 4KB", action.QuotaName)
}
// make sure the specified snaps exist and aren't currently in another group
if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
return err
}
grp, allGrps, err := quotaCreateImpl(st, action, allGrps)
if err != nil {
return err
}
// ensure the snap services with the group
opts := &ensureSnapServicesForGroupOptions{
allGrps: allGrps,
}
return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
}
func quotaCreateImpl(st *state.State, action QuotaControlAction, allGrps map[string]*quota.Group) (*quota.Group, map[string]*quota.Group, error) {
// make sure that the parent group exists if we are creating a sub-group
var grp *quota.Group
var err error
updatedGrps := []*quota.Group{}
if action.ParentName != "" {
parentGrp, ok := allGrps[action.ParentName]
if !ok {
return nil, nil, fmt.Errorf("cannot create group under non-existent parent group %q", action.ParentName)
}
grp, err = parentGrp.NewSubGroup(action.QuotaName, action.MemoryLimit)
if err != nil {
return nil, nil, err
}
updatedGrps = append(updatedGrps, parentGrp)
} else {
// make a new group
grp, err = quota.NewGroup(action.QuotaName, action.MemoryLimit)
if err != nil {
return nil, nil, err
}
}
updatedGrps = append(updatedGrps, grp)
// put the snaps in the group
grp.Snaps = action.AddSnaps
// update the modified groups in state
newAllGrps, err := patchQuotas(st, updatedGrps...)
if err != nil {
return nil, nil, err
}
return grp, newAllGrps, nil
}
func quotaRemove(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
// make sure the group exists
grp, ok := allGrps[action.QuotaName]
if !ok {
return fmt.Errorf("cannot remove non-existent quota group %q", action.QuotaName)
}
// make sure some of the options are not set, it's an internal error if
// anything other than the name and action are set for a removal
if action.ParentName != "" {
return fmt.Errorf("internal error, ParentName option cannot be used with remove action")
}
if len(action.AddSnaps) != 0 {
return fmt.Errorf("internal error, AddSnaps option cannot be used with remove action")
}
if action.MemoryLimit != 0 {
return fmt.Errorf("internal error, MemoryLimit option cannot be used with remove action")
}
// XXX: remove this limitation eventually
if len(grp.SubGroups) != 0 {
return fmt.Errorf("cannot remove quota group with sub-groups, remove the sub-groups first")
}
// if this group has a parent, we need to remove the linkage to this
// sub-group from the parent first
if grp.ParentGroup != "" {
// the parent here must exist otherwise AllQuotas would have failed
// because state would have been inconsistent
parent := allGrps[grp.ParentGroup]
// ensure that the parent group of this group no longer mentions this
// group as a sub-group - we know that it must since AllQuotas validated
// the state for us
if len(parent.SubGroups) == 1 {
// this group was an only child, so clear the whole list
parent.SubGroups = nil
} else {
// we have to delete the child but keep the other children
newSubgroups := make([]string, 0, len(parent.SubGroups)-1)
for _, sub := range parent.SubGroups {
if sub != action.QuotaName {
newSubgroups = append(newSubgroups, sub)
}
}
parent.SubGroups = newSubgroups
}
allGrps[grp.ParentGroup] = parent
}
// now delete the group from state - do this first for convenience to ensure
// that we can just use SnapServiceOptions below and since it operates via
// state, it will immediately reflect the deletion
delete(allGrps, action.QuotaName)
// make sure that the group set is consistent before saving it - we may need
// to delete old links from this group's parent to the child
if err := quota.ResolveCrossReferences(allGrps); err != nil {
return fmt.Errorf("cannot remove quota %q: %v", action.QuotaName, err)
}
// now set it in state
st.Set("quotas", allGrps)
// update snap service units that may need to be re-written because they are
// not in a slice anymore
opts := &ensureSnapServicesForGroupOptions{
allGrps: allGrps,
}
return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
}
func quotaUpdate(st *state.State, t *state.Task, action QuotaControlAction, allGrps map[string]*quota.Group, meter progress.Meter, perfTimings *timings.Timings) error {
// make sure the group exists
grp, ok := allGrps[action.QuotaName]
if !ok {
return fmt.Errorf("group %q does not exist", action.QuotaName)
}
// check that ParentName is not set, since we don't currently support
// re-parenting
if action.ParentName != "" {
return fmt.Errorf("group %q cannot be moved to a different parent (re-parenting not yet supported)", action.QuotaName)
}
modifiedGrps := []*quota.Group{grp}
// now ensure that all of the snaps mentioned in AddSnaps exist as snaps and
// that they aren't already in an existing quota group
if err := validateSnapForAddingToGroup(st, action.AddSnaps, action.QuotaName, allGrps); err != nil {
return err
}
// append the snaps list in the group
grp.Snaps = append(grp.Snaps, action.AddSnaps...)
// if the memory limit is not zero then change it too
if action.MemoryLimit != 0 {
// we disallow decreasing the memory limit because it is difficult to do
// so correctly with the current state of our code in
// EnsureSnapServices, see comment in ensureSnapServicesForGroup for
// full details
if action.MemoryLimit < grp.MemoryLimit {
return fmt.Errorf("cannot decrease memory limit of existing quota-group, remove and re-create it to decrease the limit")
}
grp.MemoryLimit = action.MemoryLimit
}
// update the quota group state
allGrps, err := patchQuotas(st, modifiedGrps...)
if err != nil {
return err
}
// ensure service states are updated
opts := &ensureSnapServicesForGroupOptions{
allGrps: allGrps,
}
return ensureSnapServicesForGroup(st, t, grp, opts, meter, perfTimings)
}
type ensureSnapServicesForGroupOptions struct {
// allGrps is the updated set of quota groups
allGrps map[string]*quota.Group
// extraSnaps is the set of extra snaps to consider when ensuring services,
// mainly only used when snaps are removed from quota groups
extraSnaps []string
}
// ensureSnapServicesForGroup will handle updating changes to a given quota
// group on disk, including re-generating systemd slice files, restarting snap
// services that have moved into or out of quota groups, as well as starting
// newly created quota groups and stopping and removing removed quota groups.
// This function is idempotent, in that it can be called multiple times with
// the same changes to be processed and nothing will be broken. This is mainly
// a consequence of calling wrappers.EnsureSnapServices().
// Currently, it only supports handling a single group change.
func ensureSnapServicesForGroup(st *state.State, t *state.Task, grp *quota.Group, opts *ensureSnapServicesForGroupOptions, meter progress.Meter, perfTimings *timings.Timings) error {
if opts == nil {
return fmt.Errorf("internal error: unset group information for ensuring")
}
allGrps := opts.allGrps
if meter == nil {
meter = progress.Null
}
if perfTimings == nil {
perfTimings = &timings.Timings{}
}
// extraSnaps []string, meter progress.Meter, perfTimings *timings.Timings
// build the map of snap infos to options to provide to EnsureSnapServices
snapSvcMap := map[*snap.Info]*wrappers.SnapServiceOptions{}
for _, sn := range append(grp.Snaps, opts.extraSnaps...) {
info, err := snapstate.CurrentInfo(st, sn)
if err != nil {
return err
}
opts, err := SnapServiceOptions(st, sn, allGrps)
if err != nil {
return err
}
snapSvcMap[info] = opts
}
// TODO: the following lines should maybe be EnsureOptionsForDevice() or
// something since it is duplicated a few places
ensureOpts := &wrappers.EnsureSnapServicesOptions{
Preseeding: snapdenv.Preseeding(),
}
// set RequireMountedSnapdSnap if we are on UC18+ only
deviceCtx, err := snapstate.DeviceCtx(st, nil, nil)
if err != nil {
return err
}
if !deviceCtx.Classic() && deviceCtx.Model().Base() != "" {
ensureOpts.RequireMountedSnapdSnap = true
}
grpsToStart := []*quota.Group{}
appsToRestartBySnap := map[*snap.Info][]*snap.AppInfo{}
collectModifiedUnits := func(app *snap.AppInfo, grp *quota.Group, unitType string, name, old, new string) {
switch unitType {
case "slice":
// this slice was either modified or written for the first time
// There are currently 3 possible cases that have different
// operations required, but we ignore one of them, so there really
// are just 2 cases we care about:
// 1. If this slice was initially written, we just need to systemctl
// start it
// 2. If the slice was modified to be given more resources (i.e. a
// higher memory limit), then we just need to do a daemon-reload
// which causes systemd to modify the cgroup which will always
// work since a cgroup can be atomically given more resources
// without issue since the cgroup can't be using more than the
// current limit.
// 3. If the slice was modified to be given _less_ resources (i.e. a
// lower memory limit), then we need to stop the services before
// issuing the daemon-reload to systemd, then do the
// daemon-reload which will succeed in modifying the cgroup, then
// start the services we stopped back up again. This is because
// otherwise if the services are currently running and using more
// resources than they would be allowed after the modification is
// applied by systemd to the cgroup, the kernel responds with
// EBUSY, and it isn't clear if the modification is then properly
// in place or not.
//
// We will already have called daemon-reload at the end of
// EnsureSnapServices directly, so handling case 3 is difficult, and
// for now we disallow making this sort of change to a quota group,
// that logic is handled at a higher level than this function.
// Thus the only decision we really have to make is if the slice was
// newly written or not, and if it was save it for later
if old == "" {
grpsToStart = append(grpsToStart, grp)
}
case "service":
// in this case, the only way that a service could have been changed
// was if it was moved into or out of a slice, in both cases we need
// to restart the service
sn := app.Snap
appsToRestartBySnap[sn] = append(appsToRestartBySnap[sn], app)
// TODO: what about sockets and timers? activation units just start
// the full unit, so as long as the full unit is restarted we should
// be okay?
}
}
if err := wrappers.EnsureSnapServices(snapSvcMap, ensureOpts, collectModifiedUnits, meter); err != nil {
return err
}
if ensureOpts.Preseeding {
return nil
}
// TODO: should this logic move to wrappers in wrappers.RemoveQuotaGroup()?
systemSysd := systemd.New(systemd.SystemMode, meter)
// now start the slices
for _, grp := range grpsToStart {
// TODO: what should these timeouts for stopping/restart slices be?
if err := systemSysd.Start(grp.SliceFileName()); err != nil {
return err
}
}
// after starting all the grps that we modified from EnsureSnapServices,
// we need to handle the case where a quota was removed, this will only
// happen one at a time and can be identified by the grp provided to us
// not existing in the state
if _, ok := allGrps[grp.Name]; !ok {
// stop the quota group, then remove it
if !ensureOpts.Preseeding {
if err := systemSysd.Stop(grp.SliceFileName(), 5*time.Second); err != nil {
logger.Noticef("unable to stop systemd slice while removing group %q: %v", grp.Name, err)
}
}
// TODO: this results in a second systemctl daemon-reload which is
// undesirable, we should figure out how to do this operation with a
// single daemon-reload
err := wrappers.RemoveQuotaGroup(grp, meter)
if err != nil {
return err
}
}
// after we have made all the persistent modifications to disk and state,
// set the task as done, what remains for this task handler is just to
// restart services which will happen regardless if we get rebooted after
// unlocking the state - if we got rebooted before unlocking the state, none
// of the changes we made to state would be persisted and we would run
// through everything above here again, but the second time around
// EnsureSnapServices would end up doing nothing since it is idempotent.
if t != nil {
t.SetStatus(state.DoneStatus)
}
// now restart the services for each snap that was newly moved into a quota
// group
// iterate in a sorted order over the snaps to restart their apps for easy
// tests
snaps := make([]*snap.Info, 0, len(appsToRestartBySnap))
for sn := range appsToRestartBySnap {
snaps = append(snaps, sn)
}
sort.Slice(snaps, func(i, j int) bool {
return snaps[i].InstanceName() < snaps[j].InstanceName()
})
for _, sn := range snaps {
startupOrdered, err := snap.SortServices(appsToRestartBySnap[sn])
if err != nil {
return err
}
st.Unlock()
err = wrappers.RestartServices(startupOrdered, nil, nil, meter, perfTimings)
st.Lock()
if err != nil {
return err
}
}
return nil
}
func validateSnapForAddingToGroup(st *state.State, snaps []string, group string, allGrps map[string]*quota.Group) error {
for _, name := range snaps {
// validate that the snap exists
_, err := snapstate.CurrentInfo(st, name)
if err != nil {
return fmt.Errorf("cannot use snap %q in group %q: %v", name, group, err)
}
// check that the snap is not already in a group
for _, grp := range allGrps {
if strutil.ListContains(grp.Snaps, name) {
return fmt.Errorf("cannot add snap %q to group %q: snap already in quota group %q", name, group, grp.Name)
}
}
}
return nil
}