Skip to content

Commit 4e919af

Browse files
committed
xfs: periodically relog deferred intent items
There's a subtle design flaw in the deferred log item code that can lead to pinning the log tail. Taking up the defer ops chain examples from the previous commit, we can get trapped in sequences like this: Caller hands us a transaction t0 with D0-D3 attached. The defer ops chain will look like the following if the transaction rolls succeed: t1: D0(t0), D1(t0), D2(t0), D3(t0) t2: d4(t1), d5(t1), D1(t0), D2(t0), D3(t0) t3: d5(t1), D1(t0), D2(t0), D3(t0) ... t9: d9(t7), D3(t0) t10: D3(t0) t11: d10(t10), d11(t10) t12: d11(t10) In transaction 9, we finish d9 and try to roll to t10 while holding onto an intent item for D3 that we logged in t0. The previous commit changed the order in which we place new defer ops in the defer ops processing chain to reduce the maximum chain length. Now make xfs_defer_finish_noroll capable of relogging the entire chain periodically so that we can always move the log tail forward. Most chains will never get relogged, except for operations that generate very long chains (large extents containing many blocks with different sharing levels) or are on filesystems with small logs and a lot of ongoing metadata updates. Callers are now required to ensure that the transaction reservation is large enough to handle logging done items and new intent items for the maximum possible chain length. Most callers are careful to keep the chain lengths low, so the overhead should be minimal. The decision to relog an intent item is made based on whether the intent was logged in a previous checkpoint, since there's no point in relogging an intent into the same checkpoint. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Brian Foster <bfoster@redhat.com>
1 parent 27dada0 commit 4e919af

File tree

9 files changed

+168
-0
lines changed

9 files changed

+168
-0
lines changed

fs/xfs/libxfs/xfs_defer.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "xfs_inode_item.h"
1818
#include "xfs_trace.h"
1919
#include "xfs_icache.h"
20+
#include "xfs_log.h"
2021

2122
/*
2223
* Deferred Operations in XFS
@@ -345,6 +346,42 @@ xfs_defer_cancel_list(
345346
}
346347
}
347348

349+
/*
350+
* Prevent a log intent item from pinning the tail of the log by logging a
351+
* done item to release the intent item; and then log a new intent item.
352+
* The caller should provide a fresh transaction and roll it after we're done.
353+
*/
354+
static int
355+
xfs_defer_relog(
356+
struct xfs_trans **tpp,
357+
struct list_head *dfops)
358+
{
359+
struct xfs_defer_pending *dfp;
360+
361+
ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
362+
363+
list_for_each_entry(dfp, dfops, dfp_list) {
364+
/*
365+
* If the log intent item for this deferred op is not a part of
366+
* the current log checkpoint, relog the intent item to keep
367+
* the log tail moving forward. We're ok with this being racy
368+
* because an incorrect decision means we'll be a little slower
369+
* at pushing the tail.
370+
*/
371+
if (dfp->dfp_intent == NULL ||
372+
xfs_log_item_in_current_chkpt(dfp->dfp_intent))
373+
continue;
374+
375+
trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
376+
XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
377+
dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
378+
}
379+
380+
if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
381+
return xfs_defer_trans_roll(tpp);
382+
return 0;
383+
}
384+
348385
/*
349386
* Log an intent-done item for the first pending intent, and finish the work
350387
* items.
@@ -431,6 +468,11 @@ xfs_defer_finish_noroll(
431468
if (error)
432469
goto out_shutdown;
433470

471+
/* Possibly relog intent items to keep the log moving. */
472+
error = xfs_defer_relog(tp, &dop_pending);
473+
if (error)
474+
goto out_shutdown;
475+
434476
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
435477
dfp_list);
436478
error = xfs_defer_finish_one(*tp, dfp);

fs/xfs/xfs_bmap_item.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,13 +542,40 @@ xfs_bui_item_match(
542542
return BUI_ITEM(lip)->bui_format.bui_id == intent_id;
543543
}
544544

545+
/* Relog an intent item to push the log tail forward. */
546+
static struct xfs_log_item *
547+
xfs_bui_item_relog(
548+
struct xfs_log_item *intent,
549+
struct xfs_trans *tp)
550+
{
551+
struct xfs_bud_log_item *budp;
552+
struct xfs_bui_log_item *buip;
553+
struct xfs_map_extent *extp;
554+
unsigned int count;
555+
556+
count = BUI_ITEM(intent)->bui_format.bui_nextents;
557+
extp = BUI_ITEM(intent)->bui_format.bui_extents;
558+
559+
tp->t_flags |= XFS_TRANS_DIRTY;
560+
budp = xfs_trans_get_bud(tp, BUI_ITEM(intent));
561+
set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
562+
563+
buip = xfs_bui_init(tp->t_mountp);
564+
memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp));
565+
atomic_set(&buip->bui_next_extent, count);
566+
xfs_trans_add_item(tp, &buip->bui_item);
567+
set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
568+
return &buip->bui_item;
569+
}
570+
545571
static const struct xfs_item_ops xfs_bui_item_ops = {
546572
.iop_size = xfs_bui_item_size,
547573
.iop_format = xfs_bui_item_format,
548574
.iop_unpin = xfs_bui_item_unpin,
549575
.iop_release = xfs_bui_item_release,
550576
.iop_recover = xfs_bui_item_recover,
551577
.iop_match = xfs_bui_item_match,
578+
.iop_relog = xfs_bui_item_relog,
552579
};
553580

554581
/*

fs/xfs/xfs_extfree_item.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,13 +642,42 @@ xfs_efi_item_match(
642642
return EFI_ITEM(lip)->efi_format.efi_id == intent_id;
643643
}
644644

645+
/* Relog an intent item to push the log tail forward. */
646+
static struct xfs_log_item *
647+
xfs_efi_item_relog(
648+
struct xfs_log_item *intent,
649+
struct xfs_trans *tp)
650+
{
651+
struct xfs_efd_log_item *efdp;
652+
struct xfs_efi_log_item *efip;
653+
struct xfs_extent *extp;
654+
unsigned int count;
655+
656+
count = EFI_ITEM(intent)->efi_format.efi_nextents;
657+
extp = EFI_ITEM(intent)->efi_format.efi_extents;
658+
659+
tp->t_flags |= XFS_TRANS_DIRTY;
660+
efdp = xfs_trans_get_efd(tp, EFI_ITEM(intent), count);
661+
efdp->efd_next_extent = count;
662+
memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
663+
set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
664+
665+
efip = xfs_efi_init(tp->t_mountp, count);
666+
memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
667+
atomic_set(&efip->efi_next_extent, count);
668+
xfs_trans_add_item(tp, &efip->efi_item);
669+
set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
670+
return &efip->efi_item;
671+
}
672+
645673
static const struct xfs_item_ops xfs_efi_item_ops = {
646674
.iop_size = xfs_efi_item_size,
647675
.iop_format = xfs_efi_item_format,
648676
.iop_unpin = xfs_efi_item_unpin,
649677
.iop_release = xfs_efi_item_release,
650678
.iop_recover = xfs_efi_item_recover,
651679
.iop_match = xfs_efi_item_match,
680+
.iop_relog = xfs_efi_item_relog,
652681
};
653682

654683
/*

fs/xfs/xfs_refcount_item.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,13 +560,40 @@ xfs_cui_item_match(
560560
return CUI_ITEM(lip)->cui_format.cui_id == intent_id;
561561
}
562562

563+
/* Relog an intent item to push the log tail forward. */
564+
static struct xfs_log_item *
565+
xfs_cui_item_relog(
566+
struct xfs_log_item *intent,
567+
struct xfs_trans *tp)
568+
{
569+
struct xfs_cud_log_item *cudp;
570+
struct xfs_cui_log_item *cuip;
571+
struct xfs_phys_extent *extp;
572+
unsigned int count;
573+
574+
count = CUI_ITEM(intent)->cui_format.cui_nextents;
575+
extp = CUI_ITEM(intent)->cui_format.cui_extents;
576+
577+
tp->t_flags |= XFS_TRANS_DIRTY;
578+
cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent));
579+
set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
580+
581+
cuip = xfs_cui_init(tp->t_mountp, count);
582+
memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp));
583+
atomic_set(&cuip->cui_next_extent, count);
584+
xfs_trans_add_item(tp, &cuip->cui_item);
585+
set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
586+
return &cuip->cui_item;
587+
}
588+
563589
static const struct xfs_item_ops xfs_cui_item_ops = {
564590
.iop_size = xfs_cui_item_size,
565591
.iop_format = xfs_cui_item_format,
566592
.iop_unpin = xfs_cui_item_unpin,
567593
.iop_release = xfs_cui_item_release,
568594
.iop_recover = xfs_cui_item_recover,
569595
.iop_match = xfs_cui_item_match,
596+
.iop_relog = xfs_cui_item_relog,
570597
};
571598

572599
/*

fs/xfs/xfs_rmap_item.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,13 +583,40 @@ xfs_rui_item_match(
583583
return RUI_ITEM(lip)->rui_format.rui_id == intent_id;
584584
}
585585

586+
/* Relog an intent item to push the log tail forward. */
587+
static struct xfs_log_item *
588+
xfs_rui_item_relog(
589+
struct xfs_log_item *intent,
590+
struct xfs_trans *tp)
591+
{
592+
struct xfs_rud_log_item *rudp;
593+
struct xfs_rui_log_item *ruip;
594+
struct xfs_map_extent *extp;
595+
unsigned int count;
596+
597+
count = RUI_ITEM(intent)->rui_format.rui_nextents;
598+
extp = RUI_ITEM(intent)->rui_format.rui_extents;
599+
600+
tp->t_flags |= XFS_TRANS_DIRTY;
601+
rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent));
602+
set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
603+
604+
ruip = xfs_rui_init(tp->t_mountp, count);
605+
memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp));
606+
atomic_set(&ruip->rui_next_extent, count);
607+
xfs_trans_add_item(tp, &ruip->rui_item);
608+
set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
609+
return &ruip->rui_item;
610+
}
611+
586612
static const struct xfs_item_ops xfs_rui_item_ops = {
587613
.iop_size = xfs_rui_item_size,
588614
.iop_format = xfs_rui_item_format,
589615
.iop_unpin = xfs_rui_item_unpin,
590616
.iop_release = xfs_rui_item_release,
591617
.iop_recover = xfs_rui_item_recover,
592618
.iop_match = xfs_rui_item_match,
619+
.iop_relog = xfs_rui_item_relog,
593620
};
594621

595622
/*

fs/xfs/xfs_stats.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
2323
uint64_t xs_xstrat_bytes = 0;
2424
uint64_t xs_write_bytes = 0;
2525
uint64_t xs_read_bytes = 0;
26+
uint64_t defer_relog = 0;
2627

2728
static const struct xstats_entry {
2829
char *desc;
@@ -70,10 +71,13 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
7071
xs_xstrat_bytes += per_cpu_ptr(stats, i)->s.xs_xstrat_bytes;
7172
xs_write_bytes += per_cpu_ptr(stats, i)->s.xs_write_bytes;
7273
xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
74+
defer_relog += per_cpu_ptr(stats, i)->s.defer_relog;
7375
}
7476

7577
len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
7678
xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
79+
len += scnprintf(buf + len, PATH_MAX-len, "defer_relog %llu\n",
80+
defer_relog);
7781
len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
7882
#if defined(DEBUG)
7983
1);

fs/xfs/xfs_stats.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct __xfsstats {
137137
uint64_t xs_xstrat_bytes;
138138
uint64_t xs_write_bytes;
139139
uint64_t xs_read_bytes;
140+
uint64_t defer_relog;
140141
};
141142

142143
#define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t))

fs/xfs/xfs_trace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,6 +2533,7 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent);
25332533
DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
25342534
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
25352535
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
2536+
DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
25362537

25372538
#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
25382539
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);

fs/xfs/xfs_trans.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ struct xfs_item_ops {
7575
int (*iop_recover)(struct xfs_log_item *lip,
7676
struct list_head *capture_list);
7777
bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
78+
struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
79+
struct xfs_trans *tp);
7880
};
7981

8082
/* Is this log item a deferred action intent? */
@@ -258,4 +260,12 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
258260

259261
extern kmem_zone_t *xfs_trans_zone;
260262

263+
static inline struct xfs_log_item *
264+
xfs_trans_item_relog(
265+
struct xfs_log_item *lip,
266+
struct xfs_trans *tp)
267+
{
268+
return lip->li_ops->iop_relog(lip, tp);
269+
}
270+
261271
#endif /* __XFS_TRANS_H__ */

0 commit comments

Comments
 (0)