Skip to content

Commit d80b065

Browse files
committed
Merge patch series "proc: restrict overmounting of ephemeral entities"
Christian Brauner <brauner@kernel.org> says: It is currently possible to mount on top of various ephemeral entities in procfs. This specifically includes magic links. To recap, magic links are links of the form /proc/<pid>/fd/<nr>. They serve as references to a target file and during path lookup they cause a jump to the target path. Such magic links disappear if the corresponding file descriptor is closed. Currently it is possible to overmount such magic links: int fd = open("/mnt/foo", O_RDONLY); sprintf(path, "/proc/%d/fd/%d", getpid(), fd); int fd2 = openat(AT_FDCWD, path, O_PATH | O_NOFOLLOW); mount("/mnt/bar", path, "", MS_BIND, 0); Arguably, this is nonsensical and is mostly interesting for an attacker that wants to somehow trick a process into e.g., reopening something that they didn't intend to reopen or to hide a malicious file descriptor. But also it risks leaking mounts for long-running processes. When overmounting a magic link like above, the mount will not be detached when the file descriptor is closed. Only the target mountpoint will disappear. Which has the consequence of making it impossible to unmount that mount afterwards. So the mount will stick around until the process exits and the /proc/<pid>/ directory is cleaned up during proc_flush_pid() when the dentries are pruned and invalidated. That in turn means it's possible for a program to accidentally leak mounts and it's also possible to make a task leak mounts without it's knowledge if the attacker just keeps overmounting things under /proc/<pid>/fd/<nr>. I think it's wrong to try and fix this by us starting to play games with close() or somewhere else to undo these mounts when the file descriptor is closed. The fact that we allow overmounting of such magic links is simply a bug and one that we need to fix. Similar things can be said about entries under fdinfo/ and map_files/ so those are restricted as well. I have a further more aggressive patch that gets out the big hammer and makes everything under /proc/<pid>/*, as well as immediate symlinks such as /proc/self, /proc/thread-self, /proc/mounts, /proc/net that point into /proc/<pid>/ not overmountable. Imho, all of this should be blocked if we can get away with it. It's only useful to hide exploits such as in [1]. And again, overmounting of any global procfs files remains unaffected and is an existing and supported use-case. Link: https://righteousit.com/2024/07/24/hiding-linux-processes-with-bind-mounts [1] // Note that repro uses the traditional way of just mounting over // /proc/<pid>/fd/<nr>. This could also all be achieved just based on // file descriptors using move_mount(). So /proc/<pid>/fd/<nr> isn't the // only entry vector here. It's also possible to e.g., mount directly // onto /proc/<pid>/map_files/* without going over /proc/<pid>/fd/<nr>. int main(int argc, char *argv[]) { char path[PATH_MAX]; creat("/mnt/foo", 0777); creat("/mnt/bar", 0777); /* * For illustration use a bunch of file descriptors in the upper * range that are unused. */ for (int i = 10000; i >= 256; i--) { printf("I'm: /proc/%d/\n", getpid()); int fd2 = open("/mnt/foo", O_RDONLY); if (fd2 < 0) { printf("%m - Failed to open\n"); _exit(1); } int newfd = dup2(fd2, i); if (newfd < 0) { printf("%m - Failed to dup\n"); _exit(1); } close(fd2); sprintf(path, "/proc/%d/fd/%d", getpid(), newfd); int fd = openat(AT_FDCWD, path, O_PATH | O_NOFOLLOW); if (fd < 0) { printf("%m - Failed to open\n"); _exit(3); } sprintf(path, "/proc/%d/fd/%d", getpid(), fd); printf("Mounting on top of %s\n", path); if (mount("/mnt/bar", path, "", MS_BIND, 0)) { printf("%m - Failed to mount\n"); _exit(4); } close(newfd); close(fd2); } /* * Give some time to look at things. The mounts now linger until * the process exits. */ sleep(10000); _exit(0); } * patches from https://lore.kernel.org/r/20240806-work-procfs-v1-0-fb04e1d09f0c@kernel.org: proc: block mounting on top of /proc/<pid>/fdinfo/* proc: block mounting on top of /proc/<pid>/fd/* proc: block mounting on top of /proc/<pid>/map_files/* proc: add proc_splice_unmountable() proc: proc_readfdinfo() -> proc_fdinfo_iterate() proc: proc_readfd() -> proc_fd_iterate() Link: https://lore.kernel.org/r/20240806-work-procfs-v1-0-fb04e1d09f0c@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents 41e8149 + cf71eaa commit d80b065

File tree

3 files changed

+23
-10
lines changed

3 files changed

+23
-10
lines changed

fs/proc/base.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2335,8 +2335,8 @@ proc_map_files_instantiate(struct dentry *dentry,
23352335
inode->i_op = &proc_map_files_link_inode_operations;
23362336
inode->i_size = 64;
23372337

2338-
d_set_d_op(dentry, &tid_map_files_dentry_operations);
2339-
return d_splice_alias(inode, dentry);
2338+
return proc_splice_unmountable(inode, dentry,
2339+
&tid_map_files_dentry_operations);
23402340
}
23412341

23422342
static struct dentry *proc_map_files_lookup(struct inode *dir,

fs/proc/fd.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ static struct dentry *proc_fd_instantiate(struct dentry *dentry,
220220
ei->op.proc_get_link = proc_fd_link;
221221
tid_fd_update_inode(task, inode, data->mode);
222222

223-
d_set_d_op(dentry, &tid_fd_dentry_operations);
224-
return d_splice_alias(inode, dentry);
223+
return proc_splice_unmountable(inode, dentry,
224+
&tid_fd_dentry_operations);
225225
}
226226

227227
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -312,14 +312,14 @@ static int proc_readfd_count(struct inode *inode, loff_t *count)
312312
return 0;
313313
}
314314

315-
static int proc_readfd(struct file *file, struct dir_context *ctx)
315+
static int proc_fd_iterate(struct file *file, struct dir_context *ctx)
316316
{
317317
return proc_readfd_common(file, ctx, proc_fd_instantiate);
318318
}
319319

320320
const struct file_operations proc_fd_operations = {
321321
.read = generic_read_dir,
322-
.iterate_shared = proc_readfd,
322+
.iterate_shared = proc_fd_iterate,
323323
.llseek = generic_file_llseek,
324324
};
325325

@@ -397,8 +397,8 @@ static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
397397
inode->i_fop = &proc_fdinfo_file_operations;
398398
tid_fd_update_inode(task, inode, 0);
399399

400-
d_set_d_op(dentry, &tid_fd_dentry_operations);
401-
return d_splice_alias(inode, dentry);
400+
return proc_splice_unmountable(inode, dentry,
401+
&tid_fd_dentry_operations);
402402
}
403403

404404
static struct dentry *
@@ -407,7 +407,7 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
407407
return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
408408
}
409409

410-
static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
410+
static int proc_fdinfo_iterate(struct file *file, struct dir_context *ctx)
411411
{
412412
return proc_readfd_common(file, ctx,
413413
proc_fdinfo_instantiate);
@@ -421,6 +421,6 @@ const struct inode_operations proc_fdinfo_inode_operations = {
421421

422422
const struct file_operations proc_fdinfo_operations = {
423423
.read = generic_read_dir,
424-
.iterate_shared = proc_readfdinfo,
424+
.iterate_shared = proc_fdinfo_iterate,
425425
.llseek = generic_file_llseek,
426426
};

fs/proc/internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,16 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
349349
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
350350
pde->proc_dops = &proc_net_dentry_ops;
351351
}
352+
353+
/*
354+
* Add a new procfs dentry that can't serve as a mountpoint. That should
355+
* encompass anything that is ephemeral and can just disappear while the
356+
* process is still around.
357+
*/
358+
static inline struct dentry *proc_splice_unmountable(struct inode *inode,
359+
struct dentry *dentry, const struct dentry_operations *d_ops)
360+
{
361+
d_set_d_op(dentry, d_ops);
362+
dont_mount(dentry);
363+
return d_splice_alias(inode, dentry);
364+
}

0 commit comments

Comments
 (0)