Skip to content

Commit

Permalink
btrfs: locking extents for async writeback
Browse files Browse the repository at this point in the history
For async writebacks, lock the extents and then perform the cow file
range for async. Unlock when async_chunk is free'd.

Since writeback is performed in range, so locked_page can be removed
from the structures and function parameters. Similarly for page_started
and nr_written.

A writeback could involve a hole, so check if the range locked covers
the entire extent returned by find_lock_delalloc_range().
If not try to lock the entire range or unlock the pages locked.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
  • Loading branch information
goldwynr committed Mar 10, 2023
1 parent 4fcf59a commit 970a0d3
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 108 deletions.
4 changes: 4 additions & 0 deletions fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,10 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
curr_sample_pos = 0;
while (index < index_end) {
page = find_get_page(inode->i_mapping, index);
if (!page) {
index++;
continue;
}
in_data = kmap_local_page(page);
/* Handle case where the start is not aligned to PAGE_SIZE */
i = start % PAGE_SIZE;
Expand Down
10 changes: 5 additions & 5 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ static int __process_pages_contig(struct address_space *mapping,
return err;
}

static noinline void __unlock_for_delalloc(struct inode *inode,
noinline void __unlock_for_delalloc(struct inode *inode,
struct page *locked_page,
u64 start, u64 end)
{
Expand All @@ -375,8 +375,7 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 processed_end = delalloc_start;
int ret;

ASSERT(locked_page);
if (index == locked_page->index && index == end_index)
if (locked_page && index == locked_page->index && index == end_index)
return 0;

ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
Expand Down Expand Up @@ -424,8 +423,9 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
ASSERT(orig_end > orig_start);

/* The range should at least cover part of the page */
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
orig_end <= page_offset(locked_page)));
if (locked_page)
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
orig_end <= page_offset(locked_page)));
again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
Expand Down
2 changes: 2 additions & 0 deletions fs/btrfs/extent_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);

void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
void __unlock_for_delalloc(struct inode *inode, struct page *locked_page,
u64 start, u64 end);

#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
Expand Down
184 changes: 81 additions & 103 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,6 @@ struct async_extent {

struct async_chunk {
struct btrfs_inode *inode;
struct page *locked_page;
u64 start;
u64 end;
blk_opf_t write_flags;
Expand Down Expand Up @@ -887,18 +886,6 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
}
}
cleanup_and_bail_uncompressed:
/*
* No compression, but we still need to write the pages in the file
* we've been given so far. redirty the locked page if it corresponds
* to our extent and set things up for the async work queue to run
* cow_file_range to do the normal delalloc dance.
*/
if (async_chunk->locked_page &&
(page_offset(async_chunk->locked_page) >= start &&
page_offset(async_chunk->locked_page)) <= end) {
__set_page_dirty_nobuffers(async_chunk->locked_page);
/* unlocked later on in the async handlers */
}

if (redirty)
extent_range_redirty_for_io(&inode->vfs_inode, start, end);
Expand Down Expand Up @@ -926,8 +913,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
}

static int submit_uncompressed_range(struct btrfs_inode *inode,
struct async_extent *async_extent,
struct page *locked_page)
struct async_extent *async_extent)
{
u64 start = async_extent->start;
u64 end = async_extent->start + async_extent->ram_size - 1;
Expand All @@ -942,31 +928,20 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
* Also we call cow_file_range() with @unlock_page == 0, so that we
* can directly submit them without interruption.
*/
ret = cow_file_range(inode, locked_page, start, end, &page_started,
ret = cow_file_range(inode, NULL, start, end, &page_started,
&nr_written, 0, NULL);
/* Inline extent inserted, page gets unlocked and everything is done */
if (page_started) {
ret = 0;
goto out;
}
if (ret < 0) {
btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
if (locked_page) {
const u64 page_start = page_offset(locked_page);
const u64 page_end = page_start + PAGE_SIZE - 1;

btrfs_page_set_error(inode->root->fs_info, locked_page,
page_start, PAGE_SIZE);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
end_extent_writepage(locked_page, ret, page_start, page_end);
unlock_page(locked_page);
}
btrfs_cleanup_ordered_extents(inode, NULL, start, end - start + 1);
goto out;
}

ret = extent_write_locked_range(&inode->vfs_inode, start, end);
/* All pages will be unlocked, including @locked_page */
/* All pages will be unlocked */
out:
kfree(async_extent);
return ret;
Expand All @@ -980,27 +955,14 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_map *em;
int ret = 0;
u64 start = async_extent->start;
u64 end = async_extent->start + async_extent->ram_size - 1;

/*
* If async_chunk->locked_page is in the async_extent range, we need to
* handle it.
*/
if (async_chunk->locked_page) {
u64 locked_page_start = page_offset(async_chunk->locked_page);
u64 locked_page_end = locked_page_start + PAGE_SIZE - 1;

if (!(start >= locked_page_end || end <= locked_page_start))
locked_page = async_chunk->locked_page;
}

/* We have fall back to uncompressed write */
if (!async_extent->pages)
return submit_uncompressed_range(inode, async_extent, locked_page);
return submit_uncompressed_range(inode, async_extent);

ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
Expand Down Expand Up @@ -1467,6 +1429,8 @@ static noinline void async_cow_start(struct btrfs_work *work)

compressed_extents = compress_file_range(async_chunk);
if (compressed_extents == 0) {
unlock_extent(&async_chunk->inode->io_tree,
async_chunk->start, async_chunk->end, NULL);
btrfs_add_delayed_iput(async_chunk->inode);
async_chunk->inode = NULL;
}
Expand All @@ -1491,8 +1455,11 @@ static noinline void async_cow_submit(struct btrfs_work *work)
* always adjust ->async_delalloc_pages as its paired with the init
* happening in cow_file_range_async
*/
if (async_chunk->inode)
if (async_chunk->inode) {
submit_compressed_extents(async_chunk);
unlock_extent(&async_chunk->inode->io_tree,
async_chunk->start, async_chunk->end, NULL);
}

/* atomic_sub_return implies a barrier */
if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
Expand All @@ -1511,16 +1478,15 @@ static noinline void async_cow_free(struct btrfs_work *work)
if (async_chunk->blkcg_css)
css_put(async_chunk->blkcg_css);


async_cow = async_chunk->async_cow;
if (atomic_dec_and_test(&async_cow->num_chunks))
kvfree(async_cow);
}

static int cow_file_range_async(struct btrfs_inode *inode,
struct writeback_control *wbc,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
Expand All @@ -1530,20 +1496,9 @@ static int cow_file_range_async(struct btrfs_inode *inode,
u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
bool should_compress;
unsigned nofs_flag;
const blk_opf_t write_flags = wbc_to_write_flags(wbc);

unlock_extent(&inode->io_tree, start, end, NULL);

if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
num_chunks = 1;
should_compress = false;
} else {
should_compress = true;
}

nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
Expand All @@ -1555,19 +1510,17 @@ static int cow_file_range_async(struct btrfs_inode *inode,
unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK | PAGE_SET_ERROR;

extent_clear_unlock_delalloc(inode, start, end, locked_page,
extent_clear_unlock_delalloc(inode, start, end, NULL,
clear_bits, page_ops);
return -ENOMEM;
}

set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);

for (i = 0; i < num_chunks; i++) {
if (should_compress)
cur_end = min(end, start + SZ_512K - 1);
else
cur_end = end;
cur_end = min(end, start + SZ_512K - 1);

/*
* igrab is called higher up in the call chain, take only the
Expand All @@ -1581,33 +1534,6 @@ static int cow_file_range_async(struct btrfs_inode *inode,
async_chunk[i].write_flags = write_flags;
INIT_LIST_HEAD(&async_chunk[i].extents);

/*
* The locked_page comes all the way from writepage and its
* the original page we were actually given. As we spread
* this large delalloc region across multiple async_chunk
* structs, only the first struct needs a pointer to locked_page
*
* This way we don't need racey decisions about who is supposed
* to unlock it.
*/
if (locked_page) {
/*
* Depending on the compressibility, the pages might or
* might not go through async. We want all of them to
* be accounted against wbc once. Let's do it here
* before the paths diverge. wbc accounting is used
* only for foreign writeback detection and doesn't
* need full accuracy. Just account the whole thing
* against the first page.
*/
wbc_account_cgroup_owner(wbc, locked_page,
cur_end - start);
async_chunk[i].locked_page = locked_page;
locked_page = NULL;
} else {
async_chunk[i].locked_page = NULL;
}

if (blkcg_css != blkcg_root_css) {
css_get(blkcg_css);
async_chunk[i].blkcg_css = blkcg_css;
Expand All @@ -1623,10 +1549,8 @@ static int cow_file_range_async(struct btrfs_inode *inode,

btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);

*nr_written += nr_pages;
start = cur_end + 1;
}
*page_started = 1;
return 0;
}

Expand Down Expand Up @@ -2229,18 +2153,13 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
} else if (!btrfs_inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
} else {
if (zoned)
ret = run_delalloc_zoned(inode, locked_page, start, end,
page_started, nr_written);
else
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
}
ASSERT(ret <= 0);
if (ret)
Expand Down Expand Up @@ -7873,14 +7792,68 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
}

static int btrfs_writepages_async(struct btrfs_inode *inode, struct writeback_control *wbc, u64 start, u64 end)
{
u64 last_start, cur_start = start;
u64 cur_end;
int ret = 0;

lock_extent(&inode->io_tree, start, end, NULL);

while (cur_start < end) {
bool found;
last_start = cur_start;
cur_end = end;

found = find_lock_delalloc_range(&inode->vfs_inode, NULL, &cur_start, &cur_end);
/* Nothing to writeback */
if (!found) {
unlock_extent(&inode->io_tree, cur_start, cur_end, NULL);
cur_start = cur_end + 1;
continue;
}

/* A hole with no pages, unlock part therof */
if (cur_start > last_start)
unlock_extent(&inode->io_tree, last_start, cur_start - 1, NULL);

/* Got more than we requested for */
if (cur_end > end) {
if (try_lock_extent(&inode->io_tree, end + 1, cur_end, NULL)) {
/* Try writing the whole extent */
end = cur_end;
} else {
/*
* Someone is holding the extent lock.
* Unlock pages from last part of extent, and
* write just as much writepage requested for
*/
__unlock_for_delalloc(&inode->vfs_inode, NULL, end + 1, cur_end);
cur_end = end;
}
}

ret = cow_file_range_async(inode, wbc, cur_start, cur_end);
if (ret < 0) {
unlock_extent(&inode->io_tree, cur_start, end, NULL);
break;
}

cur_start = cur_end + 1;
}

return ret;
}

static int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
u64 start = 0, end = LLONG_MAX;
struct inode *inode = mapping->host;
struct btrfs_inode *inode = BTRFS_I(mapping->host);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct extent_state *cached = NULL;
int ret;
loff_t isize = i_size_read(inode);
loff_t isize = i_size_read(&inode->vfs_inode);
struct writeback_control new_wbc = *wbc;

if (new_wbc.range_cyclic) {
Expand All @@ -7897,9 +7870,14 @@ static int btrfs_writepages(struct address_space *mapping,
if (start >= end)
return 0;

lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached);
ret = extent_writepages(mapping, wbc);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached);
if (btrfs_test_opt(fs_info, COMPRESS) &&
btrfs_inode_can_compress(inode)) {
ret = btrfs_writepages_async(inode, wbc, start, end);
} else {
lock_extent(&inode->io_tree, start, end, &cached);
ret = extent_writepages(mapping, wbc);
unlock_extent(&inode->io_tree, start, end, &cached);
}

if (new_wbc.range_cyclic) {
wbc->range_start = new_wbc.range_start;
Expand Down

0 comments on commit 970a0d3

Please sign in to comment.