diff --git a/filesystem/filesystem.go b/filesystem/filesystem.go index e0ef1108..ecdeae1f 100644 --- a/filesystem/filesystem.go +++ b/filesystem/filesystem.go @@ -70,8 +70,9 @@ var ( // DeviceNumber - Device number of the filesystem. This is set even if // Device isn't, since all filesystems have a device // number assigned by the kernel, even pseudo-filesystems. -// BindMnt - True if this mount is not for the full filesystem but -// rather is only for a subtree. +// Subtree - The mounted subtree of the filesystem. This is usually +// "/", meaning that the entire filesystem is mounted, but +// it can differ for bind mounts. // ReadOnly - True if this is a read-only mount // // In order to use a Mount to store fscrypt metadata, some directories must be @@ -99,7 +100,7 @@ type Mount struct { FilesystemType string Device string DeviceNumber DeviceNumber - BindMnt bool + Subtree string ReadOnly bool } diff --git a/filesystem/mountpoint.go b/filesystem/mountpoint.go index edbdc851..acddbae7 100644 --- a/filesystem/mountpoint.go +++ b/filesystem/mountpoint.go @@ -39,6 +39,11 @@ import ( var ( // This map holds data about the state of the system's filesystems. + // + // It only contains one Mount per filesystem, even if there are + // additional bind mounts, since we want to store fscrypt metadata in + // only one place per filesystem. If it is ambiguous which Mount should + // be used, an explicit nil entry is stored. mountsByDevice map[DeviceNumber]*Mount // Used to make the mount functions thread safe mountMutex sync.Mutex @@ -122,7 +127,7 @@ func parseMountInfoLine(line string) *Mount { if err != nil { return nil } - mnt.BindMnt = unescapeString(fields[3]) != "/" + mnt.Subtree = unescapeString(fields[3]) mnt.Path = unescapeString(fields[4]) for _, opt := range strings.Split(fields[5], ",") { if opt == "ro" { @@ -134,6 +139,124 @@ func parseMountInfoLine(line string) *Mount { return mnt } +type mountpointTreeNode struct { + mount *Mount + parent *mountpointTreeNode + children []*mountpointTreeNode +} + +func addUncontainedSubtreesRecursive(dst map[string]bool, + node *mountpointTreeNode, allUncontainedSubtrees map[string]bool) { + if allUncontainedSubtrees[node.mount.Subtree] { + dst[node.mount.Subtree] = true + } + for _, child := range node.children { + addUncontainedSubtreesRecursive(dst, child, allUncontainedSubtrees) + } +} + +// findMainMount finds the "main" Mount of a filesystem. The "main" Mount is +// where the filesystem's fscrypt metadata is stored. +// +// Normally, there is just one Mount and it's of the entire filesystem +// (mnt.Subtree == "/"). But in general, the filesystem might be mounted in +// multiple places, including "bind mounts" where mnt.Subtree != "/". Also, the +// filesystem might have a combination of read-write and read-only mounts. +// +// To handle most cases, we could just choose a mount with mnt.Subtree == "/", +// preferably a read-write mount. However, that doesn't work in containers +// where the "/" subtree might not be mounted. Here's a real-world example: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /var/lib/lxc/base/rootfs / +// /var/cache/pacman/pkg /var/cache/pacman/pkg +// /srv/repo/x86_64 /srv/http/x86_64 +// +// In this case, all mnt.Subtree are independent. To handle this case, we must +// choose the Mount whose mnt.Path contains the others, i.e. the first one. +// Note: the fscrypt metadata won't be usable from outside the container since +// it won't be at the real root of the filesystem, but that may be acceptable. +// +// However, we can't look *only* at mnt.Path, since in some cases mnt.Subtree is +// needed to correctly handle bind mounts. For example, in the following case, +// the first Mount should be chosen: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /foo /foo +// /foo/dir /dir +// +// To solve this, we divide the mounts into non-overlapping trees of mnt.Path. +// Then, we choose one of these trees which contains (exactly or via path +// prefix) *all* mnt.Subtree. We then return the root of this tree. In both +// the above examples, this algorithm returns the first Mount. +func findMainMount(filesystemMounts []*Mount) *Mount { + // Index this filesystem's mounts by path. Note: paths are unique here, + // since non-last mounts were already excluded earlier. + // + // Also build the set of all mounted subtrees. + mountsByPath := make(map[string]*mountpointTreeNode) + allSubtrees := make(map[string]bool) + for _, mnt := range filesystemMounts { + mountsByPath[mnt.Path] = &mountpointTreeNode{mount: mnt} + allSubtrees[mnt.Subtree] = true + } + + // Divide the mounts into non-overlapping trees of mountpoints. + for path, mntNode := range mountsByPath { + for path != "/" && mntNode.parent == nil { + path = filepath.Dir(path) + if parent := mountsByPath[path]; parent != nil { + mntNode.parent = parent + parent.children = append(parent.children, mntNode) + } + } + } + + // Build the set of mounted subtrees that aren't contained in any other + // mounted subtree. + allUncontainedSubtrees := make(map[string]bool) + for subtree := range allSubtrees { + contained := false + for t := subtree; t != "/" && !contained; { + t = filepath.Dir(t) + contained = allSubtrees[t] + } + if !contained { + allUncontainedSubtrees[subtree] = true + } + } + + // Select the root of a mountpoint tree whose mounted subtrees contain + // *all* mounted subtrees. Equivalently, select a mountpoint tree in + // which every uncontained subtree is mounted. + var mainMount *Mount + for _, mntNode := range mountsByPath { + mnt := mntNode.mount + if mntNode.parent != nil { + continue + } + uncontainedSubtrees := make(map[string]bool) + addUncontainedSubtreesRecursive(uncontainedSubtrees, mntNode, allUncontainedSubtrees) + if len(uncontainedSubtrees) != len(allUncontainedSubtrees) { + continue + } + // If there's more than one eligible mount, they should have the + // same Subtree. Otherwise it's ambiguous which one to use. + if mainMount != nil && mainMount.Subtree != mnt.Subtree { + log.Printf("Unsupported case: %q (%v) has multiple non-overlapping mounts. This filesystem will be ignored!", + mnt.Device, mnt.DeviceNumber) + return nil + } + // Prefer a read-write mount to a read-only one. + if mainMount == nil || mainMount.ReadOnly { + mainMount = mnt + } + } + return mainMount +} + // This is separate from loadMountInfo() only for unit testing. func readMountInfo(r io.Reader) error { mountsByPath := make(map[string]*Mount) @@ -159,23 +282,17 @@ func readMountInfo(r io.Reader) error { // mountpoints are listed in mount order. mountsByPath[mnt.Path] = mnt } - // fscrypt only really cares about the root directory of each - // filesystem, because that's where the fscrypt metadata is stored. So - // keep just one Mount per filesystem, ignoring bind mounts. Store that - // Mount in mountsByDevice so that it can be found later from the device - // number. Also, prefer a read-write mount to a read-only one. - // - // If the filesystem has *only* bind mounts, store an explicit nil entry - // so that we can show a useful error message later. + // For each filesystem, choose a "main" Mount and discard any additional + // bind mounts. fscrypt only cares about the main Mount, since it's + // where the fscrypt metadata is stored. Store all main Mounts in + // mountsByDevice so that they can be found by device number later. + allMountsByDevice := make(map[DeviceNumber][]*Mount) for _, mnt := range mountsByPath { - existingMnt, ok := mountsByDevice[mnt.DeviceNumber] - if mnt.BindMnt { - if !ok { - mountsByDevice[mnt.DeviceNumber] = nil - } - } else if existingMnt == nil || (existingMnt.ReadOnly && !mnt.ReadOnly) { - mountsByDevice[mnt.DeviceNumber] = mnt - } + allMountsByDevice[mnt.DeviceNumber] = + append(allMountsByDevice[mnt.DeviceNumber], mnt) + } + for deviceNumber, filesystemMounts := range allMountsByDevice { + mountsByDevice[deviceNumber] = findMainMount(filesystemMounts) } return nil } @@ -197,13 +314,13 @@ func loadMountInfo() error { return nil } -func filesystemRootDirNotVisibleError(deviceNumber DeviceNumber) error { - return errors.Errorf("root of filesystem on device %q (%v) is not visible in the current mount namespace", +func filesystemLacksMainMountError(deviceNumber DeviceNumber) error { + return errors.Errorf("Device %q (%v) lacks a \"main\" mountpoint in the current mount namespace, so it's ambiguous where to store the fscrypt metadata.", getDeviceName(deviceNumber), deviceNumber) } -// AllFilesystems lists all non-bind Mounts on the current system ordered by -// path. Use CheckSetup() to see if they are used with fscrypt. +// AllFilesystems lists all mounted filesystems ordered by path to their "main" +// Mount. Use CheckSetup() to see if they are set up for use with fscrypt. func AllFilesystems() ([]*Mount, error) { mountMutex.Lock() defer mountMutex.Unlock() @@ -250,24 +367,24 @@ func FindMount(path string) (*Mount, error) { return nil, errors.Errorf("couldn't find mountpoint containing %q", path) } if mnt == nil { - return nil, filesystemRootDirNotVisibleError(deviceNumber) + return nil, filesystemLacksMainMountError(deviceNumber) } return mnt, nil } // GetMount is like FindMount, except GetMount also returns an error if the path -// isn't the root directory of a filesystem. For example, if a filesystem is -// mounted at "/mnt" and the file "/mnt/a" exists, FindMount("/mnt/a") will -// succeed whereas GetMount("/mnt/a") will fail. +// doesn't name the same file as the filesystem's "main" Mount. For example, if +// a filesystem is fully mounted at "/mnt" and if "/mnt/a" exists, then +// FindMount("/mnt/a") will succeed whereas GetMount("/mnt/a") will fail. This +// is true even if "/mnt/a" is a bind mount of part of the same filesystem. func GetMount(mountpoint string) (*Mount, error) { mnt, err := FindMount(mountpoint) if err != nil { return nil, errors.Wrap(ErrNotAMountpoint, mountpoint) } - // Check whether 'mountpoint' is the root directory of the filesystem, - // i.e. is the same directory as 'mnt.Path'. Use os.SameFile() (i.e., - // compare inode numbers) rather than compare canonical paths, since the - // filesystem might be fully mounted in multiple places. + // Check whether 'mountpoint' names the same directory as 'mnt.Path'. + // Use os.SameFile() (i.e., compare inode numbers) rather than compare + // canonical paths, since filesystems may be mounted in multiple places. fi1, err := os.Stat(mountpoint) if err != nil { return nil, err @@ -323,7 +440,7 @@ func getMountFromLink(link string) (*Mount, error) { getDeviceName(deviceNumber), deviceNumber) } if mnt == nil { - return nil, filesystemRootDirNotVisibleError(deviceNumber) + return nil, filesystemLacksMainMountError(deviceNumber) } return mnt, nil } diff --git a/filesystem/mountpoint_test.go b/filesystem/mountpoint_test.go index ad0dab77..633ff947 100644 --- a/filesystem/mountpoint_test.go +++ b/filesystem/mountpoint_test.go @@ -84,8 +84,8 @@ func TestLoadMountInfoBasic(t *testing.T) { if mnt.DeviceNumber.String() != "259:3" { t.Error("Wrong device number") } - if mnt.BindMnt { - t.Error("Wrong bind mount flag") + if mnt.Subtree != "/" { + t.Error("Wrong subtree") } if mnt.ReadOnly { t.Error("Wrong readonly flag") @@ -220,8 +220,9 @@ func TestReadWriteMountIsPreferredOverReadOnlyMount(t *testing.T) { } } -// Test that a mount of the full filesystem is preferred over a bind mount. -func TestFullMountIsPreferredOverBindMount(t *testing.T) { +// Test that a mount of the full filesystem is preferred over mounts of non-root +// subtrees, given independent mountpoints. +func TestRootSubtreeIsPreferred(t *testing.T) { mountinfo := ` 222 15 259:3 /subtree1 /home rw shared:1 - ext4 /dev/root rw 222 15 259:3 / /mnt rw shared:1 - ext4 /dev/root rw @@ -231,16 +232,133 @@ func TestFullMountIsPreferredOverBindMount(t *testing.T) { defer endLoadMountInfoTest() loadMountInfoFromString(mountinfo) mnt := mountForDevice("259:3") - if mnt.Path != "/mnt" { + if mnt.Subtree != "/" { + t.Error("Wrong mount was chosen") + } +} + +// Test that a mount that is not of the full filesystem but still contains all +// other mounted subtrees is preferred, given independent mountpoints. +func TestHighestSubtreeIsPreferred(t *testing.T) { + mountinfo := ` +222 15 259:3 /foo/bar /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /foo /tmp rw shared:1 - ext4 /dev/root rw +222 15 259:3 /foo/baz /home rw shared:1 - ext4 /dev/root rw +` + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/foo" { + t.Error("Wrong mount was chosen") + } +} + +// Test that mountpoint "/" is preferred, given independent subtrees. +func TestRootMountpointIsPreferred(t *testing.T) { + mountinfo := ` +222 15 259:3 /var/cache/pacman/pkg /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /var/lib/lxc/base/rootfs / rw shared:1 - ext4 /dev/root rw +222 15 259:3 /srv/repo/x86_64 /home rw shared:1 - ext4 /dev/root rw +` + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/var/lib/lxc/base/rootfs" { + t.Error("Wrong mount was chosen") + } +} + +// Test that a mountpoint that is not "/" but still contains all other +// mountpoints is preferred, given independent subtrees. +func TestHighestMountpointIsPreferred(t *testing.T) { + tempDir, err := ioutil.TempDir("", "fscrypt") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempDir, err = filepath.Abs(tempDir) + if err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(tempDir+"/a/b", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/a/c", 0700); err != nil { + t.Fatal(err) + } + mountinfo := fmt.Sprintf(` +222 15 259:3 /0 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2 %s rw shared:1 - ext4 /dev/root rw +`, tempDir+"/a/b", tempDir+"/a", tempDir+"/a/c") + + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/1" { + t.Error("Wrong mount was chosen") + } +} + +// Test that if some subtrees are contained in other subtrees, *and* some +// mountpoints are contained in other mountpoints, the chosen Mount is the root +// of a tree of mountpoints whose mounted subtrees contain all mounted subtrees. +func TestLoadContainedSubtreesAndMountpoints(t *testing.T) { + tempDir, err := ioutil.TempDir("", "fscrypt") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempDir, err = filepath.Abs(tempDir) + if err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(tempDir+"/a/b", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/a/c", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/d", 0700); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(tempDir+"/e", 0700); err != nil { + t.Fatal(err) + } + // The first three mounts form a tree of mountpoints. The rest have + // independent mountpoints but have mounted subtrees contained in the + // mounted subtrees of the first mountpoint tree. + mountinfo := fmt.Sprintf(` +222 15 259:3 /0 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /1/3 %s rw shared:1 - ext4 /dev/root rw +222 15 259:3 /2/4 %s rw shared:1 - ext4 /dev/root rw +`, tempDir+"/a/b", tempDir+"/a", tempDir+"/a/c", + tempDir+"/d", tempDir+"/e") + + beginLoadMountInfoTest() + defer endLoadMountInfoTest() + loadMountInfoFromString(mountinfo) + deviceNumber, _ := newDeviceNumberFromString("259:3") + mnt := mountsByDevice[deviceNumber] + if mnt.Subtree != "/1" { t.Error("Wrong mount was chosen") } } -// Test that if a filesystem only has bind mounts, a nil mountsByDevice entry is -// created. -func TestLoadOnlyBindMounts(t *testing.T) { +// Test loading mounts with independent subtrees *and* independent mountpoints. +// This case is ambiguous, so an explicit nil entry should be stored. +func TestLoadAmbiguousMounts(t *testing.T) { mountinfo := ` -222 15 259:3 /foo /mnt ro,relatime shared:1 - ext4 /dev/root rw,data=ordered +222 15 259:3 /foo /mnt rw shared:1 - ext4 /dev/root rw +222 15 259:3 /bar /tmp rw shared:1 - ext4 /dev/root rw ` beginLoadMountInfoTest() defer endLoadMountInfoTest()