Skip to content

Commit

Permalink
switch-root: always use MS_BIND to move api vfs over
Browse files Browse the repository at this point in the history
We previously would use MS_MOVE to move the old procfs, sysfs, /dev/ and
/run to the new place in some places, and MS_BIND in others.

The logic when to use MS_MOVE and when to use MS_BIND was pretty
arbitrary so far: we'd use MS_MOVE during the initrd → host transition
and MS_BIND when transitioning from host into the exitrd during
shutdown.

Traditionally, using MS_MOVE was preferable, because we didn't bother
with unmounting the old mount hierarchy before the switch root, and thus
using MS_MOVE did some clean-up as side-effect (because the old mounts
went away this way). But since we nowadays properly umount all remaining
mount points (since 268d124) when
transitioning it's pointless.

Let's just use MS_BIND always. Let's tweak it though: let's use
MS_BIND|MS_REC for the kernel API VFS, and MS_BIND without MS_REC for
/run/. The latter reflects the fact that the submounts /run/ has usually
are not so much about just accessing kernel APIs but about auxiliary
user resources. Hence let's only move the main mount over for that.

While we are at it, also set up the base filesystem *before* we move the
mounts from the old to the new root, since the base filesystem setup
logic creates various needed inodes for us, which we really should make
use of instead of creating on our own.
  • Loading branch information
poettering committed Jun 2, 2023
1 parent 34f21ff commit 7c764d4
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 31 deletions.
2 changes: 0 additions & 2 deletions src/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1838,10 +1838,8 @@ static int do_reexecute(
}

if (switch_root_dir) {
/* And switch root with MS_MOVE, because we remove the old directory afterwards and detach it. */
r = switch_root(/* new_root= */ switch_root_dir,
/* old_root_after= */ NULL,
MS_MOVE,
/* destroy_old_root= */ objective == MANAGER_SWITCH_ROOT);
if (r < 0)
log_error_errno(r, "Failed to switch root, trying to continue: %m");
Expand Down
61 changes: 38 additions & 23 deletions src/shared/switch-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "base-filesystem.h"
#include "chase.h"
#include "creds-util.h"
#include "fd-util.h"
#include "initrd-util.h"
#include "log.h"
Expand All @@ -27,15 +28,26 @@

int switch_root(const char *new_root,
const char *old_root_after, /* path below the new root, where to place the old root after the transition; may be NULL to unmount it */
unsigned long mount_flags, /* MS_MOVE or MS_BIND used for /proc/, /dev/, /run/, /sys/ */
bool destroy_old_root) {

struct {
const char *path;
unsigned long mount_flags;
} transfer_table[] = {
{ "/dev", MS_BIND|MS_REC }, /* Recursive, because we want to save the original /dev/shm + /dev/pts and similar */
{ "/sys", MS_BIND|MS_REC }, /* Similar, we want to retain various API VFS, or the cgroupv1 /sys/fs/cgroup/ tree */
{ "/proc", MS_BIND|MS_REC }, /* Similar */
{ "/run", MS_BIND }, /* Stuff mounted below this we don't save, as it might have lost its relevance, i.e. credentials, removable media and such, we rather want that the new boot mounts this fresh */
{ SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND }, /* Credentials passed into the system should survive */
{ ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, MS_BIND }, /* Similar */
{ "/run/host", MS_BIND|MS_REC }, /* Host supplied hierarchy should also survive */
};

_cleanup_close_ int old_root_fd = -EBADF, new_root_fd = -EBADF;
_cleanup_free_ char *resolved_old_root_after = NULL;
int r, istmp;

assert(new_root);
assert(IN_SET(mount_flags, MS_MOVE, MS_BIND));

/* Check if we shall remove the contents of the old root */
old_root_fd = open("/", O_DIRECTORY|O_CLOEXEC);
Expand Down Expand Up @@ -83,32 +95,35 @@ int switch_root(const char *new_root,
if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");

FOREACH_STRING(path, "/sys", "/dev", "/run", "/proc") {
_cleanup_free_ char *chased = NULL;

r = chase(path, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased, NULL);
if (r < 0)
return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, path);
if (r > 0) {
/* Already exists. Let's see if it is a mount point already. */
r = path_is_mount_point(chased, NULL, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
if (r > 0) /* If it is already mounted, then do nothing */
continue;
} else
/* Doesn't exist yet? */
(void) mkdir_p_label(chased, 0755);

if (mount(path, chased, NULL, mount_flags, NULL) < 0)
return log_error_errno(errno, "Failed to mount %s to %s: %m", path, chased);
}

/* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
* them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
* and switch_root() nevertheless. */
(void) base_filesystem_create_fd(new_root_fd, new_root, UID_INVALID, GID_INVALID);

FOREACH_ARRAY(transfer, transfer_table, ELEMENTSOF(transfer_table)) {
_cleanup_free_ char *chased = NULL;

if (access(transfer->path, F_OK) < 0) {
log_debug_errno(errno, "Path '%s' to move to target root directory, not found, ignoring: %m", transfer->path);
continue;
}

r = chase(transfer->path, new_root, CHASE_PREFIX_ROOT, &chased, NULL);
if (r < 0)
return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, transfer->path);

/* Let's see if it is a mount point already. */
r = path_is_mount_point(chased, NULL, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
if (r > 0) /* If it is already mounted, then do nothing */
continue;

r = mount_nofollow_verbose(LOG_ERR, transfer->path, chased, NULL, transfer->mount_flags, NULL);
if (r < 0)
return r;
}

if (fchdir(new_root_fd) < 0)
return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);

Expand Down
2 changes: 1 addition & 1 deletion src/shared/switch-root.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

#include <stdbool.h>

int switch_root(const char *new_root, const char *old_root_after, unsigned long mount_flags, bool destroy_old_root);
int switch_root(const char *new_root, const char *old_root_after, bool destroy_old_root);
6 changes: 1 addition & 5 deletions src/shutdown/shutdown.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,10 @@ static int switch_root_initramfs(void) {
if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0)
return log_error_errno(errno, "Failed to make /run/initramfs private mount: %m");

/* switch_root with MS_BIND, because there might still be processes lurking around, which have open file descriptors.
* /run/initramfs/shutdown will take care of these.
* Also do not detach the old root, because /run/initramfs/shutdown needs to access it.
*/
/* Do not detach the old root, because /run/initramfs/shutdown needs to access it. */
return switch_root(
/* new_root= */ "/run/initramfs",
/* old_root_after= */ "/oldroot",
MS_BIND,
/* destroy_old_root= */ false);
}

Expand Down

0 comments on commit 7c764d4

Please sign in to comment.