Skip to content

Commit db04662

Browse files
committed
fs: allow detached mounts in clone_private_mount()
In container workloads idmapped mounts are often used as layers for overlayfs. Recently I added the ability to specify layers in overlayfs as file descriptors instead of path names. It should be possible to simply use the detached mounts directly when specifying layers instead of having to attach them beforehand. They are discarded after overlayfs is mounted anyway so it's pointless system calls for userspace and pointless locking for the kernel. This just recently come up again in [1]. So enable clone_private_mount() to use detached mounts directly. Following conditions must be met: - Provided path must be the root of a detached mount tree. - Provided path may not create mount namespace loops. - Provided path must be mounted. It would be possible to be stricter and require that the caller must have CAP_SYS_ADMIN in the owning user namespace of the anonymous mount namespace but since this restriction isn't enforced for move_mount() there's no point in enforcing it for clone_private_mount(). This contains a folded fix for: Reported-by: syzbot+62dfea789a2cedac1298@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=62dfea789a2cedac1298 provided by Lizhi Xu <lizhi.xu@windriver.com> in [2]. Link: https://lore.kernel.org/r/20250207071331.550952-1-lizhi.xu@windriver.com [2] Link: https://lore.kernel.org/r/fd8f6574-f737-4743-b220-79c815ee1554@mbaynton.com [1] Link: https://lore.kernel.org/r/20250123-avancieren-erfreuen-3d61f6588fdd@brauner Tested-by: Mike Baynton <mike@mbaynton.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 29349a3 commit db04662

File tree

1 file changed

+43
-35
lines changed

1 file changed

+43
-35
lines changed

fs/namespace.c

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
23692369
return false;
23702370
}
23712371

2372+
/*
2373+
* Check that there aren't references to earlier/same mount namespaces in the
2374+
* specified subtree. Such references can act as pins for mount namespaces
2375+
* that aren't checked by the mount-cycle checking code, thereby allowing
2376+
* cycles to be made.
2377+
*/
2378+
static bool check_for_nsfs_mounts(struct mount *subtree)
2379+
{
2380+
struct mount *p;
2381+
bool ret = false;
2382+
2383+
lock_mount_hash();
2384+
for (p = subtree; p; p = next_mnt(p, subtree))
2385+
if (mnt_ns_loop(p->mnt.mnt_root))
2386+
goto out;
2387+
2388+
ret = true;
2389+
out:
2390+
unlock_mount_hash();
2391+
return ret;
2392+
}
2393+
23722394
/**
23732395
* clone_private_mount - create a private clone of a path
23742396
* @path: path to clone
@@ -2377,37 +2399,45 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
23772399
* will not be attached anywhere in the namespace and will be private (i.e.
23782400
* changes to the originating mount won't be propagated into this).
23792401
*
2402+
* This assumes caller has called or done the equivalent of may_mount().
2403+
*
23802404
* Release with mntput().
23812405
*/
23822406
struct vfsmount *clone_private_mount(const struct path *path)
23832407
{
23842408
struct mount *old_mnt = real_mount(path->mnt);
23852409
struct mount *new_mnt;
23862410

2387-
down_read(&namespace_sem);
2411+
scoped_guard(rwsem_read, &namespace_sem)
23882412
if (IS_MNT_UNBINDABLE(old_mnt))
2389-
goto invalid;
2413+
return ERR_PTR(-EINVAL);
2414+
2415+
if (mnt_has_parent(old_mnt)) {
2416+
if (!check_mnt(old_mnt))
2417+
return ERR_PTR(-EINVAL);
2418+
} else {
2419+
if (!is_mounted(&old_mnt->mnt))
2420+
return ERR_PTR(-EINVAL);
23902421

2391-
if (!check_mnt(old_mnt))
2392-
goto invalid;
2422+
/* Make sure this isn't something purely kernel internal. */
2423+
if (!is_anon_ns(old_mnt->mnt_ns))
2424+
return ERR_PTR(-EINVAL);
2425+
2426+
/* Make sure we don't create mount namespace loops. */
2427+
if (!check_for_nsfs_mounts(old_mnt))
2428+
return ERR_PTR(-EINVAL);
2429+
}
23932430

23942431
if (has_locked_children(old_mnt, path->dentry))
2395-
goto invalid;
2432+
return ERR_PTR(-EINVAL);
23962433

23972434
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
2398-
up_read(&namespace_sem);
2399-
24002435
if (IS_ERR(new_mnt))
2401-
return ERR_CAST(new_mnt);
2436+
return ERR_PTR(-EINVAL);
24022437

24032438
/* Longterm mount to be removed by kern_unmount*() */
24042439
new_mnt->mnt_ns = MNT_NS_INTERNAL;
2405-
24062440
return &new_mnt->mnt;
2407-
2408-
invalid:
2409-
up_read(&namespace_sem);
2410-
return ERR_PTR(-EINVAL);
24112441
}
24122442
EXPORT_SYMBOL_GPL(clone_private_mount);
24132443

@@ -3206,28 +3236,6 @@ static inline int tree_contains_unbindable(struct mount *mnt)
32063236
return 0;
32073237
}
32083238

3209-
/*
3210-
* Check that there aren't references to earlier/same mount namespaces in the
3211-
* specified subtree. Such references can act as pins for mount namespaces
3212-
* that aren't checked by the mount-cycle checking code, thereby allowing
3213-
* cycles to be made.
3214-
*/
3215-
static bool check_for_nsfs_mounts(struct mount *subtree)
3216-
{
3217-
struct mount *p;
3218-
bool ret = false;
3219-
3220-
lock_mount_hash();
3221-
for (p = subtree; p; p = next_mnt(p, subtree))
3222-
if (mnt_ns_loop(p->mnt.mnt_root))
3223-
goto out;
3224-
3225-
ret = true;
3226-
out:
3227-
unlock_mount_hash();
3228-
return ret;
3229-
}
3230-
32313239
static int do_set_group(struct path *from_path, struct path *to_path)
32323240
{
32333241
struct mount *from, *to;

0 commit comments

Comments
 (0)