Skip to content

Commit

Permalink
fanotify: introduce directory entry pre-modify permission events
Browse files Browse the repository at this point in the history
Add fsnotify_{name,rename}_perm() hooks to generate
FAN_PRE_{CREATE,DELETE,MOVE_*} permission events when filesystem objects
are about to be linked/unlinked to/from the directory specified by path.

The directory entry pre-modify permission events will not be generated
when the operation is performed relative to a O_PATH dfd that was received
inside a lookup permission event.

Pre-modify permission events are called without filesystem locks held,
so backends will be able to perform filesystem operations before the
modification.

The intended consumer is a persistent change tracking service.

We add the wrappers mnt_want_write_parent{,s}(), which pass the paths and
directory entry names of the filesystem objects about to be modified
and call the fsnotify pre-modify permission hooks.

The new wrappers also acquire/release s_write_srcu to opt-in to vfs write
barriers.  The srcu read section covers both the pre-modify event and
the directory entry modification that follows.

These hooks do not cover all the possible ways that users can make
directory entry modifications, but they cover the relevant syscalls.

Modifications via overlayfs to upper layer do not generate pre-modify
events which is consistent with the fact that overlayfs sets the
FMODE_NONOTIFY flag on underlying open files.

Modifications via nfsd will be covered by a followup patch.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
  • Loading branch information
amir73il committed Jun 27, 2023
1 parent 59de97e commit 29c60e4
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 24 deletions.
78 changes: 57 additions & 21 deletions fs/namei.c
Expand Up @@ -3565,6 +3565,7 @@ static const char *open_last_lookups(struct nameidata *nd,
bool got_write = false;
struct dentry *dentry;
const char *res;
int idx;

nd->flags |= op->intent;

Expand Down Expand Up @@ -3598,7 +3599,13 @@ static const char *open_last_lookups(struct nameidata *nd,
}

if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
got_write = !mnt_want_write(nd->path.mnt);
struct lookup_result res = {
.last = nd->last,
.flags = nd->flags & LOOKUP_RES_FLAGS_MASK,
};

got_write = !mnt_want_write_parent(&nd->path, MAY_CREATE,
&res, &idx);
/*
* do _not_ fail yet - we might not need that or fail with
* a different error; let lookup_open() decide; we'll be
Expand All @@ -3618,7 +3625,7 @@ static const char *open_last_lookups(struct nameidata *nd,
inode_unlock_shared(dir->d_inode);

if (got_write)
mnt_drop_write(nd->path.mnt);
mnt_drop_write_srcu(nd->path.mnt, idx);

if (IS_ERR(dentry))
return ERR_CAST(dentry);
Expand Down Expand Up @@ -3912,8 +3919,9 @@ struct file *do_file_open_root(const struct path *root,
return file;
}

static struct dentry *filename_create(int dfd, struct filename *name,
struct path *path, unsigned int lookup_flags)
static struct dentry *filename_create_srcu(int dfd, struct filename *name,
struct path *path,
unsigned int lookup_flags, int *pidx)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct lookup_result res;
Expand All @@ -3935,7 +3943,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
goto out;

/* don't fail immediately if it's r/o, at least try to report other errors */
err2 = mnt_want_write(path->mnt);
err2 = mnt_want_write_parent(path, MAY_CREATE, &res, pidx);
/*
* Do the final lookup. Suppress 'create' if there is a trailing
* '/', and a directory wasn't requested.
Expand Down Expand Up @@ -3973,12 +3981,32 @@ static struct dentry *filename_create(int dfd, struct filename *name,
unlock:
inode_unlock(path->dentry->d_inode);
if (!err2)
mnt_drop_write(path->mnt);
mnt_drop_write_srcu(path->mnt, *pidx);
out:
path_put(path);
return dentry;
}

static void done_path_create_srcu(struct path *path, struct dentry *dentry,
int idx)
{
__sb_end_write_srcu(path->mnt->mnt_sb, idx);
done_path_create(path, dentry);
}

static struct dentry *filename_create(int dfd, struct filename *name,
struct path *path,
unsigned int lookup_flags)
{
struct dentry *res;
int idx;

res = filename_create_srcu(dfd, name, path, lookup_flags, &idx);
if (!IS_ERR(res))
__sb_end_write_srcu(path->mnt->mnt_sb, idx);
return res;
}

struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
Expand Down Expand Up @@ -4083,12 +4111,13 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
struct path path;
int error;
unsigned int lookup_flags = 0;
int idx;

error = may_mknod(mode);
if (error)
goto out1;
retry:
dentry = filename_create(dfd, name, &path, lookup_flags);
dentry = filename_create_srcu(dfd, name, &path, lookup_flags, &idx);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out1;
Expand Down Expand Up @@ -4116,7 +4145,7 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
break;
}
out2:
done_path_create(&path, dentry);
done_path_create_srcu(&path, dentry, idx);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
Expand Down Expand Up @@ -4186,9 +4215,10 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
struct path path;
int error;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
int idx;

retry:
dentry = filename_create(dfd, name, &path, lookup_flags);
dentry = filename_create_srcu(dfd, name, &path, lookup_flags, &idx);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_putname;
Expand All @@ -4199,7 +4229,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, mode);
}
done_path_create(&path, dentry);
done_path_create_srcu(&path, dentry, idx);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
Expand Down Expand Up @@ -4281,6 +4311,7 @@ int do_rmdir(int dfd, struct filename *name)
struct path path;
struct lookup_result res;
unsigned int lookup_flags = 0;
int idx;
retry:
error = filename_parentat(dfd, name, lookup_flags, &path, &res);
if (error)
Expand All @@ -4298,7 +4329,7 @@ int do_rmdir(int dfd, struct filename *name)
goto exit2;
}

error = mnt_want_write(path.mnt);
error = mnt_want_write_parent(&path, MAY_DELETE, &res, &idx);
if (error)
goto exit2;

Expand All @@ -4319,7 +4350,7 @@ int do_rmdir(int dfd, struct filename *name)
dput(dentry);
exit3:
inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
mnt_drop_write_srcu(path.mnt, idx);
exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
Expand Down Expand Up @@ -4421,6 +4452,7 @@ int do_unlinkat(int dfd, struct filename *name)
struct inode *inode = NULL;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
int idx;
retry:
error = filename_parentat(dfd, name, lookup_flags, &path, &res);
if (error)
Expand All @@ -4430,7 +4462,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (res.type != LAST_NORM)
goto exit2;

error = mnt_want_write(path.mnt);
error = mnt_want_write_parent(&path, MAY_DELETE, &res, &idx);
if (error)
goto exit2;
retry_deleg:
Expand Down Expand Up @@ -4463,7 +4495,7 @@ int do_unlinkat(int dfd, struct filename *name)
if (!error)
goto retry_deleg;
}
mnt_drop_write(path.mnt);
mnt_drop_write_srcu(path.mnt, idx);
exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
Expand Down Expand Up @@ -4544,13 +4576,14 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = 0;
int idx;

if (IS_ERR(from)) {
error = PTR_ERR(from);
goto out_putnames;
}
retry:
dentry = filename_create(newdfd, to, &path, lookup_flags);
dentry = filename_create_srcu(newdfd, to, &path, lookup_flags, &idx);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_putnames;
Expand All @@ -4559,7 +4592,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
if (!error)
error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
dentry, from->name);
done_path_create(&path, dentry);
done_path_create_srcu(&path, dentry, idx);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
Expand Down Expand Up @@ -4688,6 +4721,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
struct inode *delegated_inode = NULL;
int how = 0;
int error;
int idx;

if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
error = -EINVAL;
Expand All @@ -4710,8 +4744,8 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
if (error)
goto out_putnames;

new_dentry = filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
new_dentry = filename_create_srcu(newdfd, new, &new_path,
(how & LOOKUP_REVAL), &idx);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_putpath;
Expand All @@ -4729,7 +4763,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
new_dentry, &delegated_inode);
out_dput:
done_path_create(&new_path, new_dentry);
done_path_create_srcu(&new_path, new_dentry, idx);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error) {
Expand Down Expand Up @@ -4962,6 +4996,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false;
int error = -EINVAL;
int idx;

if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
goto put_names;
Expand Down Expand Up @@ -4997,7 +5032,8 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (new_res.type != LAST_NORM)
goto exit2;

error = mnt_want_write(old_path.mnt);
error = mnt_want_write_parents(&old_path, &old_res,
&new_path, &new_res, &idx);
if (error)
goto exit2;

Expand Down Expand Up @@ -5076,7 +5112,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
if (!error)
goto retry_deleg;
}
mnt_drop_write(old_path.mnt);
mnt_drop_write_srcu(old_path.mnt, idx);
exit2:
if (retry_estale(error, lookup_flags))
should_retry = true;
Expand Down
81 changes: 81 additions & 0 deletions fs/namespace.c
Expand Up @@ -466,6 +466,87 @@ int mnt_want_write_path_attr(const struct path *path, unsigned int attr,
}
EXPORT_SYMBOL_GPL(mnt_want_write_path_attr);

/**
* mnt_want_write_parent - get write access to parent's mount before link/unlink
* @mask: either MAY_CREATE or MAY_DELETE
* @path: the path who's mount on which to take a write
* @res: additional path lookup results
* @pidx: output value to pass to mnt_drop_write_srcu()
*
* In addition to taking write access, it is also used to notify
* listeners on an intent to make modifications in the filesystem.
* A successful call must be paired with mnt_drop_write_srcu().
*/
int mnt_want_write_parent(const struct path *path, int mask,
const struct lookup_result *res, int *pidx)
{
struct super_block *sb = path->mnt->mnt_sb;
int idx, ret;

/* vfs write barrier covers also the pre-modify event */
idx = __sb_start_write_srcu(sb);

if (!(res->flags & LOOKUP_NONOTIFY)) {
ret = fsnotify_name_perm(path, &res->last, mask);
if (ret)
goto out_write_srcu;
}
ret = mnt_want_write(path->mnt);
if (ret || !pidx)
goto out_write_srcu;

*pidx = idx;
return 0;

out_write_srcu:
__sb_end_write_srcu(sb, idx);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_parent);

/**
* mnt_want_write_parents - get write access to parents' mount before rename
* @oldpath: the old path who's mount on which to take a write
* @oldres: additional old path lookup results
* @newpath: the new path who's mount on which to take a write
* @newres: additional new path lookup results
* @pidx: output value to pass to mnt_drop_write_srcu()
*
* In addition to taking write access, it is also used to notify
* listeners on an intent to make modifications in the filesystem.
* A successful call must be paired with mnt_drop_write_srcu().
*/
int mnt_want_write_parents(const struct path *oldpath,
const struct lookup_result *oldres,
const struct path *newpath,
const struct lookup_result *newres, int *pidx)
{
struct super_block *sb = oldpath->mnt->mnt_sb;
int idx, ret;

/* vfs write barrier covers also the pre-modify event */
idx = __sb_start_write_srcu(sb);

if (!(oldres->flags & LOOKUP_NONOTIFY) ||
!(newres->flags & LOOKUP_NONOTIFY)) {
ret = fsnotify_rename_perm(oldpath, &oldres->last,
newpath, &newres->last);
if (ret)
goto out_write_srcu;
}
ret = mnt_want_write(oldpath->mnt);
if (ret)
goto out_write_srcu;

*pidx = idx;
return 0;

out_write_srcu:
__sb_end_write_srcu(sb, idx);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_parents);

/**
* __mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
Expand Down
2 changes: 1 addition & 1 deletion fs/notify/fanotify/fanotify.c
Expand Up @@ -941,7 +941,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);

BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 25);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 29);

mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
Expand Down
4 changes: 3 additions & 1 deletion include/linux/fanotify.h
Expand Up @@ -94,7 +94,9 @@
#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \
FAN_RENAME)

#define FANOTIFY_PRE_MODIFY_EVENTS (FAN_PRE_MODIFY | FAN_PRE_ATTRIB)
#define FANOTIFY_PRE_MODIFY_EVENTS (FAN_PRE_MODIFY | FAN_PRE_ATTRIB | \
FAN_PRE_CREATE | FAN_PRE_DELETE | \
FAN_PRE_MOVE_FROM | FAN_PRE_MOVE_TO)

/* Events that require a permission response from user */
#define FANOTIFY_PERM_EVENTS (FANOTIFY_PRE_MODIFY_EVENTS | \
Expand Down
3 changes: 3 additions & 0 deletions include/linux/fs.h
Expand Up @@ -104,6 +104,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define MAY_NOT_BLOCK 0x00000080
/* called with vfs locks held, do not call sb_start_write() */
#define MAY_NOT_START_WRITE 0x00000100
/* for fsnotify_name_perm() */
#define MAY_CREATE 0x00000200
#define MAY_DELETE 0x00000400

/*
* flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
Expand Down

0 comments on commit 29c60e4

Please sign in to comment.