Skip to content

Commit

Permalink
fs: implement 'vfs write barriers'
Browse files Browse the repository at this point in the history
Add per-sb s_write_srcu to implement 'vfs write barriers'.

Writers need to opt-in to participate in 'vfs write barriers'
by using the new {sb,file}_{start,end}_write_srcu() helpers.

sb_write_barrier() calls synchronize_srcu() to wait for all the writers
that opted-in to participate in 'vfs write barriers'.

sb_write_barrier() may block, waiting for old writes to complete, but it
never blocks new writers from starting write.

To minimize performance overhead when write barriers are not needed,
consumers of sb_write_barrier() need to activate write barrier on sb by
calling activate_sb_write_barrier().  There is no deactivate, so the
performance overhead sticks with the sb until it is killed.

Tested-by: kernel test robot <oliver.sang@intel.com>
Link: https://lore.kernel.org/oe-lkp/202306192245.d77d7216-oliver.sang@intel.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
  • Loading branch information
amir73il committed Aug 17, 2023
1 parent 43460be commit 655606c
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 0 deletions.
44 changes: 44 additions & 0 deletions fs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,13 @@ static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
struct srcu_struct *write_srcu = sb_write_srcu(s);
int i;

if (write_srcu) {
cleanup_srcu_struct(write_srcu);
kfree(write_srcu);
}
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_free_rwsem(&s->s_writers.rw_sem[i]);
kfree(s);
Expand Down Expand Up @@ -277,6 +282,45 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
return NULL;
}

/**
* activate_sb_write_barrier - activate write barries on superblock
* @s: superblock to activate write barriers on
*
* sb_write_barrier() calls will have no effect before this is called.
* After this is called, sb_write_barrier() will wait for all the tasks
* that called __sb_start_write_srcu() after write barrier was activated.
* For now, there is no deactivate_sb_write_barrier().
*/
int activate_sb_write_barrier(struct super_block *s)
{
struct srcu_struct *write_srcu = sb_write_srcu(s);

/* Already activated? */
if (sb_write_srcu(s))
return 0;

spin_lock(&sb_lock);
write_srcu = sb_write_srcu(s);
if (!write_srcu) {
write_srcu = kzalloc(sizeof(struct srcu_struct), GFP_USER);
if (write_srcu) {
init_srcu_struct(write_srcu);
/*
* cmpxchg() provides the barrier so that callers of
* sb_write_srcu() can see only initialized structure.
*/
if (cmpxchg(&s->s_write_srcu, NULL, write_srcu)) {
WARN_ON_ONCE(1);
cleanup_srcu_struct(write_srcu);
kfree(write_srcu);
}
}
}
spin_unlock(&sb_lock);

return write_srcu ? 0 : -ENOMEM;
}

/* Superblock refcounting */

/*
Expand Down
111 changes: 111 additions & 0 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
#include <linux/srcu.h>

#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
Expand Down Expand Up @@ -1282,6 +1283,9 @@ struct super_block {

spinlock_t s_inode_wblist_lock;
struct list_head s_inodes_wb; /* writeback inodes */

/* synchronize_srcu() can be used as a 'vfs write barrier' */
struct srcu_struct *s_write_srcu;
} __randomize_layout;

static inline struct user_namespace *i_user_ns(const struct inode *inode)
Expand Down Expand Up @@ -1567,6 +1571,70 @@ static inline bool file_write_not_started(const struct file *file)
return sb_write_not_started(file_inode(file)->i_sb);
}

static inline struct srcu_struct *sb_write_srcu(const struct super_block *sb)
{
return READ_ONCE(sb->s_write_srcu);
}

static inline int __sb_start_write_srcu(struct super_block *sb)
{
struct srcu_struct *write_srcu = sb_write_srcu(sb);

if (!write_srcu)
return -1;
return srcu_read_lock(write_srcu);
}

static inline void __sb_end_write_srcu(struct super_block *sb, int idx)
{
struct srcu_struct *write_srcu = sb_write_srcu(sb);

if (idx < 0)
return;
/* We do not support deactivating write barrier */
if (!WARN_ON_ONCE(!write_srcu))
srcu_read_unlock(write_srcu, idx);
}

static inline int sb_write_srcu_started(const struct super_block *sb)
{
struct srcu_struct *write_srcu = sb_write_srcu(sb);

return write_srcu && srcu_read_lock_held(write_srcu);
}

static inline int __file_start_write_srcu(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return -1;
return __sb_start_write_srcu(file_inode(file)->i_sb);
}

static inline void __file_end_write_srcu(struct file *file, int idx)
{
__sb_end_write_srcu(file_inode(file)->i_sb, idx);
}

static inline int file_write_srcu_started(const struct file *file)
{
return sb_write_srcu_started(file_inode(file)->i_sb);
}

/*
* Wait for in-progress writers without blocking new writers.
* This barrier is only applicable to writers that opted-in to write barriers
* with the *_start_write_srcu() helpers.
*/
static inline void sb_write_barrier(struct super_block *sb)
{
struct srcu_struct *write_srcu = sb_write_srcu(sb);

if (write_srcu)
synchronize_srcu(write_srcu);
}

int activate_sb_write_barrier(struct super_block *sb);

/**
* sb_end_write - drop write access to a superblock
* @sb: the super we wrote to
Expand All @@ -1579,6 +1647,20 @@ static inline void sb_end_write(struct super_block *sb)
__sb_end_write(sb, SB_FREEZE_WRITE);
}

/**
* sb_end_write_srcu - drop write access to a superblock
* @sb: the super we wrote to
* @idx: return value from corresponding sb_start_write_srcu()
*
* Decrement number of writers to the filesystem. Wake up possible waiters
* wanting to freeze the filesystem.
*/
static inline void sb_end_write_srcu(struct super_block *sb, int idx)
{
__sb_end_write_srcu(sb, idx);
__sb_end_write(sb, SB_FREEZE_WRITE);
}

/**
* sb_end_pagefault - drop write access to a superblock from a page fault
* @sb: the super we wrote to
Expand Down Expand Up @@ -1632,6 +1714,21 @@ static inline bool sb_start_write_trylock(struct super_block *sb)
return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
}

/**
* sb_start_write_srcu - get write access to a superblock
* @sb: the super we write to
*
* This could be used instead of sb_start_write() to opt-in to write barriers.
* The return value must be provided as @idx arg to sb_end_write_srcu().
*/
static inline int sb_start_write_srcu(struct super_block *sb)
{
int idx = __sb_start_write_srcu(sb);

__sb_start_write(sb, SB_FREEZE_WRITE);
return idx;
}

/**
* sb_start_pagefault - get write access to a superblock from a page fault
* @sb: the super we write to
Expand Down Expand Up @@ -2637,6 +2734,20 @@ static inline void file_end_write(struct file *file)
sb_end_write(file_inode(file)->i_sb);
}

static inline int file_start_write_srcu(struct file *file)
{
int idx = __file_start_write_srcu(file);

file_start_write(file);
return idx;
}

static inline void file_end_write_srcu(struct file *file, int idx)
{
__file_end_write_srcu(file, idx);
file_end_write(file);
}

/**
* kiocb_start_write - get write access to a superblock for async file io
* @iocb: the io context we want to submit the write with
Expand Down

0 comments on commit 655606c

Please sign in to comment.