Skip to content

Commit 0bb2193

Browse files
author
Christoph Hellwig
committed
xfs: add support for zoned space reservations
For zoned file systems garbage collection (GC) has to take the iolock and mmaplock after moving data to a new place to synchronize with readers. This means waiting for garbage collection with the iolock can deadlock. To avoid this, the worst case required blocks have to be reserved before taking the iolock, which is done using a new RTAVAILABLE counter that tracks blocks that are free to write into and don't require garbage collection. The new helpers try to take these available blocks, and if there aren't enough available it wakes and waits for GC. This is done using a list of on-stack reservations to ensure fairness. Co-developed-by: Hans Holmberg <hans.holmberg@wdc.com> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
1 parent 4e4d520 commit 0bb2193

File tree

9 files changed

+343
-21
lines changed

9 files changed

+343
-21
lines changed

fs/xfs/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
138138

139139
# xfs_rtbitmap is shared with libxfs
140140
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \
141-
xfs_zone_alloc.o
141+
xfs_zone_alloc.o \
142+
xfs_zone_space_resv.o
142143

143144
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
144145
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "xfs_symlink_remote.h"
4141
#include "xfs_inode_util.h"
4242
#include "xfs_rtgroup.h"
43+
#include "xfs_zone_alloc.h"
4344

4445
struct kmem_cache *xfs_bmap_intent_cache;
4546

@@ -4788,12 +4789,18 @@ xfs_bmap_del_extent_delay(
47884789
da_diff = da_old - da_new;
47894790
fdblocks = da_diff;
47904791

4791-
if (bflags & XFS_BMAPI_REMAP)
4792+
if (bflags & XFS_BMAPI_REMAP) {
47924793
;
4793-
else if (isrt)
4794-
xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
4795-
else
4794+
} else if (isrt) {
4795+
xfs_rtbxlen_t rtxlen;
4796+
4797+
rtxlen = xfs_blen_to_rtbxlen(mp, del->br_blockcount);
4798+
if (xfs_is_zoned_inode(ip))
4799+
xfs_zoned_add_available(mp, rtxlen);
4800+
xfs_add_frextents(mp, rtxlen);
4801+
} else {
47964802
fdblocks += del->br_blockcount;
4803+
}
47974804

47984805
xfs_add_fdblocks(mp, fdblocks);
47994806
xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);

fs/xfs/libxfs/xfs_types.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,22 @@ enum xfs_free_counter {
244244
*/
245245
XC_FREE_RTEXTENTS,
246246

247+
/*
248+
* Number of available for use RT extents.
249+
*
250+
* This counter only exists for zoned RT device and indicates the number
251+
* of RT extents that can be directly used by writes. XC_FREE_RTEXTENTS
252+
* also includes blocks that have been written previously and freed, but
253+
* sit in a rtgroup that still needs a zone reset.
254+
*/
255+
XC_FREE_RTAVAILABLE,
247256
XC_FREE_NR,
248257
};
249258

250259
#define XFS_FREECOUNTER_STR \
251260
{ XC_FREE_BLOCKS, "blocks" }, \
252-
{ XC_FREE_RTEXTENTS, "rtextents" }
261+
{ XC_FREE_RTEXTENTS, "rtextents" }, \
262+
{ XC_FREE_RTAVAILABLE, "rtavailable" }
253263

254264
/*
255265
* Type verifier functions

fs/xfs/xfs_mount.c

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -465,29 +465,35 @@ xfs_mount_reset_sbqflags(
465465
static const char *const xfs_free_pool_name[] = {
466466
[XC_FREE_BLOCKS] = "free blocks",
467467
[XC_FREE_RTEXTENTS] = "free rt extents",
468+
[XC_FREE_RTAVAILABLE] = "available rt extents",
468469
};
469470

470471
uint64_t
471472
xfs_default_resblks(
472473
struct xfs_mount *mp,
473474
enum xfs_free_counter ctr)
474475
{
475-
uint64_t resblks;
476-
477-
if (ctr == XC_FREE_RTEXTENTS)
476+
switch (ctr) {
477+
case XC_FREE_BLOCKS:
478+
/*
479+
* Default to 5% or 8192 FSBs of space reserved, whichever is
480+
* smaller.
481+
*
482+
* This is intended to cover concurrent allocation transactions
483+
* when we initially hit ENOSPC. These each require a 4 block
484+
* reservation. Hence by default we cover roughly 2000
485+
* concurrent allocation reservations.
486+
*/
487+
return min(div_u64(mp->m_sb.sb_dblocks, 20), 8192ULL);
488+
case XC_FREE_RTEXTENTS:
489+
case XC_FREE_RTAVAILABLE:
490+
if (IS_ENABLED(CONFIG_XFS_RT) && xfs_has_zoned(mp))
491+
return xfs_zoned_default_resblks(mp, ctr);
478492
return 0;
479-
480-
/*
481-
* We default to 5% or 8192 fsbs of space reserved, whichever is
482-
* smaller. This is intended to cover concurrent allocation
483-
* transactions when we initially hit enospc. These each require a 4
484-
* block reservation. Hence by default we cover roughly 2000 concurrent
485-
* allocation reservations.
486-
*/
487-
resblks = mp->m_sb.sb_dblocks;
488-
do_div(resblks, 20);
489-
resblks = min_t(uint64_t, resblks, 8192);
490-
return resblks;
493+
default:
494+
ASSERT(0);
495+
return 0;
496+
}
491497
}
492498

493499
/* Ensure the summary counts are correct. */

fs/xfs/xfs_trace.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,28 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \
363363
TP_ARGS(oz, rgbno, len))
364364
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks);
365365
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks);
366+
367+
TRACE_EVENT(xfs_zones_mount,
368+
TP_PROTO(struct xfs_mount *mp),
369+
TP_ARGS(mp),
370+
TP_STRUCT__entry(
371+
__field(dev_t, dev)
372+
__field(xfs_rgnumber_t, rgcount)
373+
__field(uint32_t, blocks)
374+
__field(unsigned int, max_open_zones)
375+
),
376+
TP_fast_assign(
377+
__entry->dev = mp->m_super->s_dev;
378+
__entry->rgcount = mp->m_sb.sb_rgcount;
379+
__entry->blocks = mp->m_groups[XG_TYPE_RTG].blocks;
380+
__entry->max_open_zones = mp->m_max_open_zones;
381+
),
382+
TP_printk("dev %d:%d zoned %u blocks_per_zone %u, max_open %u",
383+
MAJOR(__entry->dev), MINOR(__entry->dev),
384+
__entry->rgcount,
385+
__entry->blocks,
386+
__entry->max_open_zones)
387+
);
366388
#endif /* CONFIG_XFS_RT */
367389

368390
TRACE_EVENT(xfs_inodegc_worker,
@@ -5767,6 +5789,7 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
57675789

57685790
TRACE_DEFINE_ENUM(XC_FREE_BLOCKS);
57695791
TRACE_DEFINE_ENUM(XC_FREE_RTEXTENTS);
5792+
TRACE_DEFINE_ENUM(XC_FREE_RTAVAILABLE);
57705793

57715794
DECLARE_EVENT_CLASS(xfs_freeblocks_resv_class,
57725795
TP_PROTO(struct xfs_mount *mp, enum xfs_free_counter ctr,

fs/xfs/xfs_zone_alloc.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,7 @@ xfs_mount_zones(
922922
xfs_info(mp, "%u zones of %u blocks size (%u max open)",
923923
mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
924924
mp->m_max_open_zones);
925+
trace_xfs_zones_mount(mp);
925926

926927
if (bdev_is_zoned(bt->bt_bdev)) {
927928
error = blkdev_report_zones(bt->bt_bdev,
@@ -939,6 +940,7 @@ xfs_mount_zones(
939940
}
940941
}
941942

943+
xfs_set_freecounter(mp, XC_FREE_RTAVAILABLE, iz.available);
942944
xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
943945
iz.available + iz.reclaimable);
944946
return 0;

fs/xfs/xfs_zone_alloc.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,30 @@
55
struct iomap_ioend;
66
struct xfs_open_zone;
77

8+
struct xfs_zone_alloc_ctx {
9+
struct xfs_open_zone *open_zone;
10+
xfs_filblks_t reserved_blocks;
11+
};
12+
13+
/*
14+
* Grab any available space, even if it is less than what the caller asked for.
15+
*/
16+
#define XFS_ZR_GREEDY (1U << 0)
17+
/*
18+
* Only grab instantly available space, don't wait or GC.
19+
*/
20+
#define XFS_ZR_NOWAIT (1U << 1)
21+
/*
22+
* Dip into the reserved pool.
23+
*/
24+
#define XFS_ZR_RESERVED (1U << 2)
25+
26+
int xfs_zoned_space_reserve(struct xfs_inode *ip, xfs_filblks_t count_fsb,
27+
unsigned int flags, struct xfs_zone_alloc_ctx *ac);
28+
void xfs_zoned_space_unreserve(struct xfs_inode *ip,
29+
struct xfs_zone_alloc_ctx *ac);
30+
void xfs_zoned_add_available(struct xfs_mount *mp, xfs_filblks_t count_fsb);
31+
832
void xfs_zone_alloc_and_submit(struct iomap_ioend *ioend,
933
struct xfs_open_zone **oz);
1034
int xfs_zone_free_blocks(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
@@ -18,6 +42,9 @@ void xfs_zoned_wake_all(struct xfs_mount *mp);
1842
bool xfs_zone_rgbno_is_valid(struct xfs_rtgroup *rtg, xfs_rgnumber_t rgbno);
1943
void xfs_mark_rtg_boundary(struct iomap_ioend *ioend);
2044

45+
uint64_t xfs_zoned_default_resblks(struct xfs_mount *mp,
46+
enum xfs_free_counter ctr);
47+
2148
#ifdef CONFIG_XFS_RT
2249
int xfs_mount_zones(struct xfs_mount *mp);
2350
void xfs_unmount_zones(struct xfs_mount *mp);

fs/xfs/xfs_zone_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,6 @@ struct xfs_zone_info {
8686

8787
struct xfs_open_zone *xfs_open_zone(struct xfs_mount *mp, bool is_gc);
8888

89+
void xfs_zoned_resv_wake_all(struct xfs_mount *mp);
90+
8991
#endif /* _XFS_ZONE_PRIV_H */

0 commit comments

Comments
 (0)