Skip to content

Commit 13bb483

Browse files
naotakdave
authored andcommitted
btrfs: zoned: activate metadata block group on write time
In the current implementation, block groups are activated at reservation time to ensure that all reserved bytes can be written to an active metadata block group. However, this approach has proven to be less efficient, as it activates block groups more frequently than necessary, putting pressure on the active zone resource and leading to potential issues such as early ENOSPC or hung_task. Another drawback of the current method is that it hampers metadata over-commit, and necessitates additional flush operations and block group allocations, resulting in decreased overall performance. To address these issues, this commit introduces a write-time activation of metadata and system block group. This involves reserving at least one active block group specifically for a metadata and system block group. Since metadata write-out is always allocated sequentially, when we need to write to a non-active block group, we can wait for the ongoing IOs to complete, activate a new block group, and then proceed with writing to the new block group. Fixes: b093151 ("btrfs: zoned: activate metadata block group on flush_space") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent a7e1ac7 commit 13bb483

File tree

3 files changed

+93
-2
lines changed

3 files changed

+93
-2
lines changed

fs/btrfs/block-group.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4287,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
42874287
struct btrfs_caching_control *caching_ctl;
42884288
struct rb_node *n;
42894289

4290+
if (btrfs_is_zoned(info)) {
4291+
if (info->active_meta_bg) {
4292+
btrfs_put_block_group(info->active_meta_bg);
4293+
info->active_meta_bg = NULL;
4294+
}
4295+
if (info->active_system_bg) {
4296+
btrfs_put_block_group(info->active_system_bg);
4297+
info->active_system_bg = NULL;
4298+
}
4299+
}
4300+
42904301
write_lock(&info->block_group_cache_lock);
42914302
while (!list_empty(&info->caching_block_groups)) {
42924303
caching_ctl = list_entry(info->caching_block_groups.next,

fs/btrfs/fs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,9 @@ struct btrfs_fs_info {
770770
u64 data_reloc_bg;
771771
struct mutex zoned_data_reloc_io_lock;
772772

773+
struct btrfs_block_group *active_meta_bg;
774+
struct btrfs_block_group *active_system_bg;
775+
773776
u64 nr_global_roots;
774777

775778
spinlock_t zone_active_bgs_lock;

fs/btrfs/zoned.c

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@
6565

6666
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
6767

68+
static void wait_eb_writebacks(struct btrfs_block_group *block_group);
69+
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
70+
6871
static inline bool sb_zone_is_full(const struct blk_zone *zone)
6972
{
7073
return (zone->cond == BLK_ZONE_COND_FULL) ||
@@ -1747,6 +1750,62 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
17471750
}
17481751
}
17491752

1753+
static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
1754+
struct btrfs_block_group **active_bg)
1755+
{
1756+
const struct writeback_control *wbc = ctx->wbc;
1757+
struct btrfs_block_group *block_group = ctx->zoned_bg;
1758+
struct btrfs_fs_info *fs_info = block_group->fs_info;
1759+
1760+
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
1761+
return true;
1762+
1763+
if (fs_info->treelog_bg == block_group->start) {
1764+
if (!btrfs_zone_activate(block_group)) {
1765+
int ret_fin = btrfs_zone_finish_one_bg(fs_info);
1766+
1767+
if (ret_fin != 1 || !btrfs_zone_activate(block_group))
1768+
return false;
1769+
}
1770+
} else if (*active_bg != block_group) {
1771+
struct btrfs_block_group *tgt = *active_bg;
1772+
1773+
/* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
1774+
lockdep_assert_held(&fs_info->zoned_meta_io_lock);
1775+
1776+
if (tgt) {
1777+
/*
1778+
* If there is an unsent IO left in the allocated area,
1779+
* we cannot wait for them as it may cause a deadlock.
1780+
*/
1781+
if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
1782+
if (wbc->sync_mode == WB_SYNC_NONE ||
1783+
(wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
1784+
return false;
1785+
}
1786+
1787+
/* Pivot active metadata/system block group. */
1788+
btrfs_zoned_meta_io_unlock(fs_info);
1789+
wait_eb_writebacks(tgt);
1790+
do_zone_finish(tgt, true);
1791+
btrfs_zoned_meta_io_lock(fs_info);
1792+
if (*active_bg == tgt) {
1793+
btrfs_put_block_group(tgt);
1794+
*active_bg = NULL;
1795+
}
1796+
}
1797+
if (!btrfs_zone_activate(block_group))
1798+
return false;
1799+
if (*active_bg != block_group) {
1800+
ASSERT(*active_bg == NULL);
1801+
*active_bg = block_group;
1802+
btrfs_get_block_group(block_group);
1803+
}
1804+
}
1805+
1806+
return true;
1807+
}
1808+
17501809
/*
17511810
* Check if @ctx->eb is aligned to the write pointer.
17521811
*
@@ -1781,8 +1840,26 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
17811840
ctx->zoned_bg = block_group;
17821841
}
17831842

1784-
if (block_group->meta_write_pointer == eb->start)
1785-
return 0;
1843+
if (block_group->meta_write_pointer == eb->start) {
1844+
struct btrfs_block_group **tgt;
1845+
1846+
if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
1847+
return 0;
1848+
1849+
if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
1850+
tgt = &fs_info->active_system_bg;
1851+
else
1852+
tgt = &fs_info->active_meta_bg;
1853+
if (check_bg_is_active(ctx, tgt))
1854+
return 0;
1855+
}
1856+
1857+
/*
1858+
* Since we may release fs_info->zoned_meta_io_lock, someone can already
1859+
* start writing this eb. In that case, we can just bail out.
1860+
*/
1861+
if (block_group->meta_write_pointer > eb->start)
1862+
return -EBUSY;
17861863

17871864
/* If for_sync, this hole will be filled with trasnsaction commit. */
17881865
if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)

0 commit comments

Comments
 (0)