Skip to content

Commit 3352633

Browse files
committed
Merge tag 'vfs-6.12.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file updates from Christian Brauner: "This is the work to cleanup and shrink struct file significantly. Right now, (focusing on x86) struct file is 232 bytes. After this series struct file will be 184 bytes aka 3 cacheline and a spare 8 bytes for future extensions at the end of the struct. With struct file being as ubiquitous as it is this should make a difference for file heavy workloads and allow further optimizations in the future. - struct fown_struct was embedded into struct file letting it take up 32 bytes in total when really it shouldn't even be embedded in struct file in the first place. Instead, actual users of struct fown_struct now allocate the struct on demand. This frees up 24 bytes. - Move struct file_ra_state into the union containg the cleanup hooks and move f_iocb_flags out of the union. This closes a 4 byte hole we created earlier and brings struct file to 192 bytes. Which means struct file is 3 cachelines and we managed to shrink it by 40 bytes. - Reorder struct file so that nothing crosses a cacheline. I suspect that in the future we will end up reordering some members to mitigate false sharing issues or just because someone does actually provide really good perf data. - Shrinking struct file to 192 bytes is only part of the work. Files use a slab that is SLAB_TYPESAFE_BY_RCU and when a kmem cache is created with SLAB_TYPESAFE_BY_RCU the free pointer must be located outside of the object because the cache doesn't know what part of the memory can safely be overwritten as it may be needed to prevent object recycling. That has the consequence that SLAB_TYPESAFE_BY_RCU may end up adding a new cacheline. So this also contains work to add a new kmem_cache_create_rcu() function that allows the caller to specify an offset where the freelist pointer is supposed to be placed. Thus avoiding the implicit addition of a fourth cacheline. - And finally this removes the f_version member in struct file. The f_version member isn't particularly well-defined. It is mainly used as a cookie to detect concurrent seeks when iterating directories. But it is also abused by some subsystems for completely unrelated things. It is mostly a directory and filesystem specific thing that doesn't really need to live in struct file and with its wonky semantics it really lacks a specific function. For pipes, f_version is (ab)used to defer poll notifications until a write has happened. And struct pipe_inode_info is used by multiple struct files in their ->private_data so there's no chance of pushing that down into file->private_data without introducing another pointer indirection. But pipes don't rely on f_pos_lock so this adds a union into struct file encompassing f_pos_lock and a pipe specific f_pipe member that pipes can use. This union of course can be extended to other file types and is similar to what we do in struct inode already" * tag 'vfs-6.12.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (26 commits) fs: remove f_version pipe: use f_pipe fs: add f_pipe ubifs: store cookie in private data ufs: store cookie in private data udf: store cookie in private data proc: store cookie in private data ocfs2: store cookie in private data input: remove f_version abuse ext4: store cookie in private data ext2: store cookie in private data affs: store cookie in private data fs: add generic_llseek_cookie() fs: use must_set_pos() fs: add must_set_pos() fs: add vfs_setpos_cookie() s390: remove unused f_version ceph: remove unused f_version adi: remove unused f_version mm: Removed @freeptr_offset to prevent doc warning ...
2 parents 2775df6 + 24a988f commit 3352633

File tree

33 files changed

+744
-271
lines changed

33 files changed

+744
-271
lines changed

drivers/char/adi.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence)
190190

191191
if (offset != file->f_pos) {
192192
file->f_pos = offset;
193-
file->f_version = 0;
194193
ret = offset;
195194
}
196195

drivers/input/input.c

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void)
10791079
wake_up(&input_devices_poll_wait);
10801080
}
10811081

1082+
struct input_seq_state {
1083+
unsigned short pos;
1084+
bool mutex_acquired;
1085+
int input_devices_state;
1086+
};
1087+
10821088
static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
10831089
{
1090+
struct seq_file *seq = file->private_data;
1091+
struct input_seq_state *state = seq->private;
1092+
10841093
poll_wait(file, &input_devices_poll_wait, wait);
1085-
if (file->f_version != input_devices_state) {
1086-
file->f_version = input_devices_state;
1094+
if (state->input_devices_state != input_devices_state) {
1095+
state->input_devices_state = input_devices_state;
10871096
return EPOLLIN | EPOLLRDNORM;
10881097
}
10891098

10901099
return 0;
10911100
}
10921101

1093-
union input_seq_state {
1094-
struct {
1095-
unsigned short pos;
1096-
bool mutex_acquired;
1097-
};
1098-
void *p;
1099-
};
1100-
11011102
static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
11021103
{
1103-
union input_seq_state *state = (union input_seq_state *)&seq->private;
1104+
struct input_seq_state *state = seq->private;
11041105
int error;
11051106

1106-
/* We need to fit into seq->private pointer */
1107-
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
1108-
11091107
error = mutex_lock_interruptible(&input_mutex);
11101108
if (error) {
11111109
state->mutex_acquired = false;
@@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)
11241122

11251123
static void input_seq_stop(struct seq_file *seq, void *v)
11261124
{
1127-
union input_seq_state *state = (union input_seq_state *)&seq->private;
1125+
struct input_seq_state *state = seq->private;
11281126

11291127
if (state->mutex_acquired)
11301128
mutex_unlock(&input_mutex);
@@ -1210,25 +1208,23 @@ static const struct seq_operations input_devices_seq_ops = {
12101208

12111209
static int input_proc_devices_open(struct inode *inode, struct file *file)
12121210
{
1213-
return seq_open(file, &input_devices_seq_ops);
1211+
return seq_open_private(file, &input_devices_seq_ops,
1212+
sizeof(struct input_seq_state));
12141213
}
12151214

12161215
static const struct proc_ops input_devices_proc_ops = {
12171216
.proc_open = input_proc_devices_open,
12181217
.proc_poll = input_proc_devices_poll,
12191218
.proc_read = seq_read,
12201219
.proc_lseek = seq_lseek,
1221-
.proc_release = seq_release,
1220+
.proc_release = seq_release_private,
12221221
};
12231222

12241223
static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
12251224
{
1226-
union input_seq_state *state = (union input_seq_state *)&seq->private;
1225+
struct input_seq_state *state = seq->private;
12271226
int error;
12281227

1229-
/* We need to fit into seq->private pointer */
1230-
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
1231-
12321228
error = mutex_lock_interruptible(&input_mutex);
12331229
if (error) {
12341230
state->mutex_acquired = false;
@@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
12431239

12441240
static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
12451241
{
1246-
union input_seq_state *state = (union input_seq_state *)&seq->private;
1242+
struct input_seq_state *state = seq->private;
12471243

12481244
state->pos = *pos + 1;
12491245
return seq_list_next(v, &input_handler_list, pos);
@@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
12521248
static int input_handlers_seq_show(struct seq_file *seq, void *v)
12531249
{
12541250
struct input_handler *handler = container_of(v, struct input_handler, node);
1255-
union input_seq_state *state = (union input_seq_state *)&seq->private;
1251+
struct input_seq_state *state = seq->private;
12561252

12571253
seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
12581254
if (handler->filter)
@@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = {
12731269

12741270
static int input_proc_handlers_open(struct inode *inode, struct file *file)
12751271
{
1276-
return seq_open(file, &input_handlers_seq_ops);
1272+
return seq_open_private(file, &input_handlers_seq_ops,
1273+
sizeof(struct input_seq_state));
12771274
}
12781275

12791276
static const struct proc_ops input_handlers_proc_ops = {
12801277
.proc_open = input_proc_handlers_open,
12811278
.proc_read = seq_read,
12821279
.proc_lseek = seq_lseek,
1283-
.proc_release = seq_release,
1280+
.proc_release = seq_release_private,
12841281
};
12851282

12861283
static int __init input_proc_init(void)

drivers/net/tun.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3452,6 +3452,12 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
34523452
struct tun_file *tfile = file->private_data;
34533453
int ret;
34543454

3455+
if (on) {
3456+
ret = file_f_owner_allocate(file);
3457+
if (ret)
3458+
goto out;
3459+
}
3460+
34553461
if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
34563462
goto out;
34573463

drivers/s390/char/hmcdrv_dev.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence)
186186
if (pos < 0)
187187
return -EINVAL;
188188

189-
if (fp->f_pos != pos)
190-
++fp->f_version;
191-
192189
fp->f_pos = pos;
193190
return pos;
194191
}

drivers/tty/tty_io.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,6 +2225,12 @@ static int __tty_fasync(int fd, struct file *filp, int on)
22252225
if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync"))
22262226
goto out;
22272227

2228+
if (on) {
2229+
retval = file_f_owner_allocate(filp);
2230+
if (retval)
2231+
goto out;
2232+
}
2233+
22282234
retval = fasync_helper(fd, filp, on, &tty->fasync);
22292235
if (retval <= 0)
22302236
goto out;

fs/affs/dir.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,44 @@
1717
#include <linux/iversion.h>
1818
#include "affs.h"
1919

20+
struct affs_dir_data {
21+
unsigned long ino;
22+
u64 cookie;
23+
};
24+
2025
static int affs_readdir(struct file *, struct dir_context *);
2126

27+
static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence)
28+
{
29+
struct affs_dir_data *data = file->private_data;
30+
31+
return generic_llseek_cookie(file, offset, whence, &data->cookie);
32+
}
33+
34+
static int affs_dir_open(struct inode *inode, struct file *file)
35+
{
36+
struct affs_dir_data *data;
37+
38+
data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL);
39+
if (!data)
40+
return -ENOMEM;
41+
file->private_data = data;
42+
return 0;
43+
}
44+
45+
static int affs_dir_release(struct inode *inode, struct file *file)
46+
{
47+
kfree(file->private_data);
48+
return 0;
49+
}
50+
2251
const struct file_operations affs_dir_operations = {
52+
.open = affs_dir_open,
2353
.read = generic_read_dir,
24-
.llseek = generic_file_llseek,
54+
.llseek = affs_dir_llseek,
2555
.iterate_shared = affs_readdir,
2656
.fsync = affs_file_fsync,
57+
.release = affs_dir_release,
2758
};
2859

2960
/*
@@ -45,6 +76,7 @@ static int
4576
affs_readdir(struct file *file, struct dir_context *ctx)
4677
{
4778
struct inode *inode = file_inode(file);
79+
struct affs_dir_data *data = file->private_data;
4880
struct super_block *sb = inode->i_sb;
4981
struct buffer_head *dir_bh = NULL;
5082
struct buffer_head *fh_bh = NULL;
@@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
5991
pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
6092

6193
if (ctx->pos < 2) {
62-
file->private_data = (void *)0;
94+
data->ino = 0;
6395
if (!dir_emit_dots(file, ctx))
6496
return 0;
6597
}
@@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
80112
/* If the directory hasn't changed since the last call to readdir(),
81113
* we can jump directly to where we left off.
82114
*/
83-
ino = (u32)(long)file->private_data;
84-
if (ino && inode_eq_iversion(inode, file->f_version)) {
115+
ino = data->ino;
116+
if (ino && inode_eq_iversion(inode, data->cookie)) {
85117
pr_debug("readdir() left off=%d\n", ino);
86118
goto inside;
87119
}
@@ -131,8 +163,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
131163
} while (ino);
132164
}
133165
done:
134-
file->f_version = inode_query_iversion(inode);
135-
file->private_data = (void *)(long)ino;
166+
data->cookie = inode_query_iversion(inode);
167+
data->ino = ino;
136168
affs_brelse(fh_bh);
137169

138170
out_brelse_dir:

fs/ceph/dir.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
707707

708708
if (offset != file->f_pos) {
709709
file->f_pos = offset;
710-
file->f_version = 0;
711710
dfi->file_info.flags &= ~CEPH_F_ATEND;
712711
}
713712
retval = offset;

fs/ext2/dir.c

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
263263
unsigned long n = pos >> PAGE_SHIFT;
264264
unsigned long npages = dir_pages(inode);
265265
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
266-
bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
266+
bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
267267
bool has_filetype;
268268

269269
if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
@@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
290290
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
291291
ctx->pos = (n<<PAGE_SHIFT) + offset;
292292
}
293-
file->f_version = inode_query_iversion(inode);
293+
*(u64 *)file->private_data = inode_query_iversion(inode);
294294
need_revalidate = false;
295295
}
296296
de = (ext2_dirent *)(kaddr+offset);
@@ -703,8 +703,30 @@ int ext2_empty_dir(struct inode *inode)
703703
return 0;
704704
}
705705

706+
static int ext2_dir_open(struct inode *inode, struct file *file)
707+
{
708+
file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
709+
if (!file->private_data)
710+
return -ENOMEM;
711+
return 0;
712+
}
713+
714+
static int ext2_dir_release(struct inode *inode, struct file *file)
715+
{
716+
kfree(file->private_data);
717+
return 0;
718+
}
719+
720+
static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence)
721+
{
722+
return generic_llseek_cookie(file, offset, whence,
723+
(u64 *)file->private_data);
724+
}
725+
706726
const struct file_operations ext2_dir_operations = {
707-
.llseek = generic_file_llseek,
727+
.open = ext2_dir_open,
728+
.release = ext2_dir_release,
729+
.llseek = ext2_dir_llseek,
708730
.read = generic_read_dir,
709731
.iterate_shared = ext2_readdir,
710732
.unlocked_ioctl = ext2_ioctl,

0 commit comments

Comments
 (0)