-
Notifications
You must be signed in to change notification settings - Fork 242
/
btrfs.go
690 lines (592 loc) · 17.7 KB
/
btrfs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
//go:build linux && cgo
// +build linux,cgo
package btrfs
/*
#include <stdlib.h>
#include <dirent.h>
// keep struct field name compatible with btrfs-progs < 6.1.
#define max_referenced max_rfer
#include <btrfs/ioctl.h>
#include <btrfs/ctree.h>
static void set_name_btrfs_ioctl_vol_args_v2(struct btrfs_ioctl_vol_args_v2* btrfs_struct, const char* value) {
snprintf(btrfs_struct->name, BTRFS_SUBVOL_NAME_MAX, "%s", value);
}
*/
import "C"
import (
"fmt"
"io/fs"
"math"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"unsafe"
graphdriver "github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/directory"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/mount"
"github.com/containers/storage/pkg/parsers"
"github.com/containers/storage/pkg/system"
"github.com/docker/go-units"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const defaultPerms = os.FileMode(0555)
func init() {
graphdriver.MustRegister("btrfs", Init)
}
type btrfsOptions struct {
minSpace uint64
size uint64
}
// Init returns a new BTRFS driver.
// An error is returned if BTRFS is not supported.
func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
fsMagic, err := graphdriver.GetFSMagic(home)
if err != nil {
return nil, err
}
if fsMagic != graphdriver.FsMagicBtrfs {
return nil, fmt.Errorf("%q is not on a btrfs filesystem: %w", home, graphdriver.ErrPrerequisites)
}
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
if err != nil {
return nil, err
}
if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
return nil, err
}
if err := mount.MakePrivate(home); err != nil {
return nil, err
}
opt, userDiskQuota, err := parseOptions(options.DriverOptions)
if err != nil {
return nil, err
}
driver := &Driver{
home: home,
uidMaps: options.UIDMaps,
gidMaps: options.GIDMaps,
options: opt,
}
if userDiskQuota {
if err := driver.enableQuota(); err != nil {
return nil, err
}
}
return graphdriver.NewNaiveDiffDriver(driver, graphdriver.NewNaiveLayerIDMapUpdater(driver)), nil
}
func parseOptions(opt []string) (btrfsOptions, bool, error) {
var options btrfsOptions
userDiskQuota := false
for _, option := range opt {
key, val, err := parsers.ParseKeyValueOpt(option)
if err != nil {
return options, userDiskQuota, err
}
key = strings.ToLower(key)
switch key {
case "btrfs.min_space":
minSpace, err := units.RAMInBytes(val)
if err != nil {
return options, userDiskQuota, err
}
userDiskQuota = true
options.minSpace = uint64(minSpace)
case "btrfs.mountopt":
return options, userDiskQuota, fmt.Errorf("btrfs driver does not support mount options")
default:
return options, userDiskQuota, fmt.Errorf("unknown option %s", key)
}
}
return options, userDiskQuota, nil
}
// Driver contains information about the filesystem mounted.
type Driver struct {
//root of the file system
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
options btrfsOptions
quotaEnabled bool
once sync.Once
}
// String prints the name of the driver (btrfs).
func (d *Driver) String() string {
return "btrfs"
}
// Status returns current driver information in a two dimensional string array.
// Output contains "Build Version" and "Library Version" of the btrfs libraries used.
// Version information can be used to check compatibility with your kernel.
func (d *Driver) Status() [][2]string {
status := [][2]string{}
if bv := btrfsBuildVersion(); bv != "-" {
status = append(status, [2]string{"Build Version", bv})
}
if lv := btrfsLibVersion(); lv != -1 {
status = append(status, [2]string{"Library Version", fmt.Sprintf("%d", lv)})
}
return status
}
// Metadata returns empty metadata for this driver.
func (d *Driver) Metadata(id string) (map[string]string, error) {
return nil, nil
}
// Cleanup unmounts the home directory.
func (d *Driver) Cleanup() error {
return mount.Unmount(d.home)
}
func free(p *C.char) {
C.free(unsafe.Pointer(p))
}
func openDir(path string) (*C.DIR, error) {
Cpath := C.CString(path)
defer free(Cpath)
dir := C.opendir(Cpath)
if dir == nil {
return nil, fmt.Errorf("can't open dir %s", path)
}
return dir, nil
}
func closeDir(dir *C.DIR) {
if dir != nil {
C.closedir(dir)
}
}
func getDirFd(dir *C.DIR) uintptr {
return uintptr(C.dirfd(dir))
}
func subvolCreate(path, name string) error {
dir, err := openDir(path)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_vol_args
for i, c := range []byte(name) {
args.name[i] = C.char(c)
}
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to create btrfs subvolume: %w", errno)
}
return nil
}
func subvolSnapshot(src, dest, name string) error {
srcDir, err := openDir(src)
if err != nil {
return err
}
defer closeDir(srcDir)
destDir, err := openDir(dest)
if err != nil {
return err
}
defer closeDir(destDir)
var args C.struct_btrfs_ioctl_vol_args_v2
args.fd = C.__s64(getDirFd(srcDir))
var cs = C.CString(name)
C.set_name_btrfs_ioctl_vol_args_v2(&args, cs)
C.free(unsafe.Pointer(cs))
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to create btrfs snapshot: %w", errno)
}
return nil
}
func isSubvolume(p string) (bool, error) {
var bufStat unix.Stat_t
if err := unix.Lstat(p, &bufStat); err != nil {
return false, err
}
// return true if it is a btrfs subvolume
return bufStat.Ino == C.BTRFS_FIRST_FREE_OBJECTID, nil
}
func subvolDelete(dirpath, name string, quotaEnabled bool) error {
dir, err := openDir(dirpath)
if err != nil {
return err
}
defer closeDir(dir)
fullPath := path.Join(dirpath, name)
var args C.struct_btrfs_ioctl_vol_args
// walk the btrfs subvolumes
walkSubvolumes := func(p string, d fs.DirEntry, err error) error {
if err != nil {
if os.IsNotExist(err) && p != fullPath {
// missing most likely because the path was a subvolume that got removed in the previous iteration
// since it's gone anyway, we don't care
return nil
}
return fmt.Errorf("walking subvolumes: %w", err)
}
// we want to check children only so skip itself
// it will be removed after the filepath walk anyways
if d.IsDir() && p != fullPath {
sv, err := isSubvolume(p)
if err != nil {
return fmt.Errorf("failed to test if %s is a btrfs subvolume: %w", p, err)
}
if sv {
if err := subvolDelete(path.Dir(p), d.Name(), quotaEnabled); err != nil {
return fmt.Errorf("failed to destroy btrfs child subvolume (%s) of parent (%s): %w", p, dirpath, err)
}
}
}
return nil
}
if err := filepath.WalkDir(path.Join(dirpath, name), walkSubvolumes); err != nil {
return fmt.Errorf("recursively walking subvolumes for %s failed: %w", dirpath, err)
}
if quotaEnabled {
if qgroupid, err := subvolLookupQgroup(fullPath); err == nil {
var args C.struct_btrfs_ioctl_qgroup_create_args
args.qgroupid = C.__u64(qgroupid)
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_CREATE,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
logrus.Errorf("Failed to delete btrfs qgroup %v for %s: %v", qgroupid, fullPath, errno.Error())
}
} else {
logrus.Errorf("Failed to lookup btrfs qgroup for %s: %v", fullPath, err.Error())
}
}
// all subvolumes have been removed
// now remove the one originally passed in
for i, c := range []byte(name) {
args.name[i] = C.char(c)
}
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to destroy btrfs snapshot %s for %s: %w", dirpath, name, errno)
}
return nil
}
func (d *Driver) updateQuotaStatus() {
d.once.Do(func() {
if !d.quotaEnabled {
// In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed
if err := qgroupStatus(d.home); err != nil {
// quota is still not enabled
return
}
d.quotaEnabled = true
}
})
}
func (d *Driver) enableQuota() error {
d.updateQuotaStatus()
if d.quotaEnabled {
return nil
}
dir, err := openDir(d.home)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_quota_ctl_args
args.cmd = C.BTRFS_QUOTA_CTL_ENABLE
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to enable btrfs quota for %s: %w", dir, errno)
}
d.quotaEnabled = true
return nil
}
func (d *Driver) subvolRescanQuota() error {
d.updateQuotaStatus()
if !d.quotaEnabled {
return nil
}
dir, err := openDir(d.home)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_quota_rescan_args
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_RESCAN_WAIT,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to rescan btrfs quota for %s: %w", dir, errno)
}
return nil
}
func subvolLimitQgroup(path string, size uint64) error {
dir, err := openDir(path)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_qgroup_limit_args
args.lim.max_rfer = C.__u64(size)
args.lim.flags = C.BTRFS_QGROUP_LIMIT_MAX_RFER
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QGROUP_LIMIT,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to limit qgroup for %s: %w", dir, errno)
}
return nil
}
// qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path
// with search key of BTRFS_QGROUP_STATUS_KEY.
// In case qgroup is enabled, the returned key type will match BTRFS_QGROUP_STATUS_KEY.
// For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035
func qgroupStatus(path string) error {
dir, err := openDir(path)
if err != nil {
return err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_search_args
args.key.tree_id = C.BTRFS_QUOTA_TREE_OBJECTID
args.key.min_type = C.BTRFS_QGROUP_STATUS_KEY
args.key.max_type = C.BTRFS_QGROUP_STATUS_KEY
args.key.max_objectid = C.__u64(math.MaxUint64)
args.key.max_offset = C.__u64(math.MaxUint64)
args.key.max_transid = C.__u64(math.MaxUint64)
args.key.nr_items = 4096
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_TREE_SEARCH,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return fmt.Errorf("failed to search qgroup for %s: %w", path, errno)
}
sh := (*C.struct_btrfs_ioctl_search_header)(unsafe.Pointer(&args.buf))
if sh._type != C.BTRFS_QGROUP_STATUS_KEY {
return fmt.Errorf("invalid qgroup search header type for %s: %v", path, sh._type)
}
return nil
}
func subvolLookupQgroup(path string) (uint64, error) {
dir, err := openDir(path)
if err != nil {
return 0, err
}
defer closeDir(dir)
var args C.struct_btrfs_ioctl_ino_lookup_args
args.objectid = C.BTRFS_FIRST_FREE_OBJECTID
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_INO_LOOKUP,
uintptr(unsafe.Pointer(&args)))
if errno != 0 {
return 0, fmt.Errorf("failed to lookup qgroup for %s: %w", dir, errno)
}
if args.treeid == 0 {
return 0, fmt.Errorf("invalid qgroup id for %s: 0", dir)
}
return uint64(args.treeid), nil
}
func (d *Driver) subvolumesDir() string {
return path.Join(d.home, "subvolumes")
}
func (d *Driver) subvolumesDirID(id string) string {
return path.Join(d.subvolumesDir(), id)
}
func (d *Driver) quotasDir() string {
return path.Join(d.home, "quotas")
}
func (d *Driver) quotasDirID(id string) string {
return path.Join(d.quotasDir(), id)
}
// CreateFromTemplate creates a layer with the same contents and parent as another layer.
func (d *Driver) CreateFromTemplate(id, template string, templateIDMappings *idtools.IDMappings, parent string, parentIDMappings *idtools.IDMappings, opts *graphdriver.CreateOpts, readWrite bool) error {
return d.Create(id, template, opts)
}
// CreateReadWrite creates a layer that is writable for use as a container
// file system.
func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
return d.Create(id, parent, opts)
}
// Create the filesystem with given id.
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
quotas := path.Join(d.home, "quotas")
subvolumes := path.Join(d.home, "subvolumes")
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
return err
}
if err := idtools.MkdirAllAs(subvolumes, 0700, rootUID, rootGID); err != nil {
return err
}
if parent == "" {
if err := subvolCreate(subvolumes, id); err != nil {
return err
}
if err := os.Chmod(path.Join(subvolumes, id), defaultPerms); err != nil {
return err
}
} else {
parentDir := d.subvolumesDirID(parent)
st, err := os.Stat(parentDir)
if err != nil {
return err
}
if !st.IsDir() {
return fmt.Errorf("%s: not a directory", parentDir)
}
if err := subvolSnapshot(parentDir, subvolumes, id); err != nil {
return err
}
}
var storageOpt map[string]string
if opts != nil {
storageOpt = opts.StorageOpt
}
if _, ok := storageOpt["size"]; ok {
driver := &Driver{}
if err := d.parseStorageOpt(storageOpt, driver); err != nil {
return err
}
if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil {
return err
}
if err := idtools.MkdirAllAs(quotas, 0700, rootUID, rootGID); err != nil {
return err
}
if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0644); err != nil {
return err
}
}
// if we have a remapped root (user namespaces enabled), change the created snapshot
// dir ownership to match
if rootUID != 0 || rootGID != 0 {
if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil {
return err
}
}
mountLabel := ""
if opts != nil {
mountLabel = opts.MountLabel
}
return label.Relabel(path.Join(subvolumes, id), mountLabel, false)
}
// Parse btrfs storage options
func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
// Read size to change the subvolume disk quota per container
for key, val := range storageOpt {
key := strings.ToLower(key)
switch key {
case "size":
size, err := units.RAMInBytes(val)
if err != nil {
return err
}
driver.options.size = uint64(size)
default:
return fmt.Errorf("unknown option %s", key)
}
}
return nil
}
// Set btrfs storage size
func (d *Driver) setStorageSize(dir string, driver *Driver) error {
if driver.options.size <= 0 {
return fmt.Errorf("btrfs: invalid storage size: %s", units.HumanSize(float64(driver.options.size)))
}
if d.options.minSpace > 0 && driver.options.size < d.options.minSpace {
return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace)))
}
if err := d.enableQuota(); err != nil {
return err
}
if err := subvolLimitQgroup(dir, driver.options.size); err != nil {
return err
}
return nil
}
// Remove the filesystem with given id.
func (d *Driver) Remove(id string) error {
dir := d.subvolumesDirID(id)
if _, err := os.Stat(dir); err != nil {
return err
}
quotasDir := d.quotasDirID(id)
if _, err := os.Stat(quotasDir); err == nil {
if err := os.Remove(quotasDir); err != nil {
return err
}
} else if !os.IsNotExist(err) {
return err
}
// Call updateQuotaStatus() to invoke status update
d.updateQuotaStatus()
if err := subvolDelete(d.subvolumesDir(), id, d.quotaEnabled); err != nil {
if d.quotaEnabled {
return err
}
// If quota is not enabled, fallback to rmdir syscall to delete subvolumes.
// This would allow unprivileged user to delete their owned subvolumes
// in kernel >= 4.18 without user_subvol_rm_alowed mount option.
}
if err := system.EnsureRemoveAll(dir); err != nil {
return err
}
if err := d.subvolRescanQuota(); err != nil {
return err
}
return nil
}
// Get the requested filesystem id.
func (d *Driver) Get(id string, options graphdriver.MountOpts) (string, error) {
dir := d.subvolumesDirID(id)
st, err := os.Stat(dir)
if err != nil {
return "", err
}
switch len(options.Options) {
case 0:
case 1:
if options.Options[0] == "ro" {
// ignore "ro" option
break
}
fallthrough
default:
return "", fmt.Errorf("btrfs driver does not support mount options")
}
if !st.IsDir() {
return "", fmt.Errorf("%s: not a directory", dir)
}
if quota, err := os.ReadFile(d.quotasDirID(id)); err == nil {
if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace {
if err := d.enableQuota(); err != nil {
return "", err
}
if err := subvolLimitQgroup(dir, size); err != nil {
return "", err
}
}
}
return dir, nil
}
// Put is not implemented for BTRFS as there is no cleanup required for the id.
func (d *Driver) Put(id string) error {
// Get() creates no runtime resources (like e.g. mounts)
// so this doesn't need to do anything.
return nil
}
// ReadWriteDiskUsage returns the disk usage of the writable directory for the ID.
// For BTRFS, it queries the subvolumes path for this ID.
func (d *Driver) ReadWriteDiskUsage(id string) (*directory.DiskUsage, error) {
return directory.Usage(d.subvolumesDirID(id))
}
// Exists checks if the id exists in the filesystem.
func (d *Driver) Exists(id string) bool {
dir := d.subvolumesDirID(id)
_, err := os.Stat(dir)
return err == nil
}
// List layers (not including additional image stores)
func (d *Driver) ListLayers() ([]string, error) {
return nil, graphdriver.ErrNotSupported
}
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
return nil
}