Skip to content

Commit 19aad27

Browse files
damien-lemoalaxboe
authored andcommitted
block: Fix reference counting for zone write plugs in error state
When zone is reset or finished, disk_zone_wplug_set_wp_offset() is called to update the zone write plug write pointer offset and to clear the zone error state (BLK_ZONE_WPLUG_ERROR flag) if it is set. However, this processing is missing dropping the reference to the zone write plug that was taken in disk_zone_wplug_set_error() when the error flag was first set. Furthermore, the error state handling must release the zone write plug lock to first execute a report zones command. When the report zone races with a reset or finish operation that clears the error, we can end up decrementing the zone write plug reference count twice: once in disk_zone_wplug_set_wp_offset() for the reset/finish operation and one more time in disk_zone_wplugs_work() once disk_zone_wplug_handle_error() completes. Fix this by introducing disk_zone_wplug_clear_error() as the symmetric function of disk_zone_wplug_set_error(). disk_zone_wplug_clear_error() decrements the zone write plug reference count obtained in disk_zone_wplug_set_error() only if the error handling has not started yet, that is, only if disk_zone_wplugs_work() has not yet taken the zone write plug off the error list. This ensure that either disk_zone_wplug_clear_error() or disk_zone_wplugs_work() drop the zone write plug reference count. Fixes: dd291d7 ("block: Introduce zone write plugging") Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Link: https://lore.kernel.org/r/20240501110907.96950-5-dlemoal@kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 74b7ae5 commit 19aad27

File tree

1 file changed

+49
-26
lines changed

1 file changed

+49
-26
lines changed

block/blk-zoned.c

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,54 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
658658
bio_list_merge(&zwplug->bio_list, &bl);
659659
}
660660

661+
static inline void disk_zone_wplug_set_error(struct gendisk *disk,
662+
struct blk_zone_wplug *zwplug)
663+
{
664+
unsigned long flags;
665+
666+
if (zwplug->flags & BLK_ZONE_WPLUG_ERROR)
667+
return;
668+
669+
/*
670+
* At this point, we already have a reference on the zone write plug.
671+
* However, since we are going to add the plug to the disk zone write
672+
* plugs work list, increase its reference count. This reference will
673+
* be dropped in disk_zone_wplugs_work() once the error state is
674+
* handled, or in disk_zone_wplug_clear_error() if the zone is reset or
675+
* finished.
676+
*/
677+
zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
678+
atomic_inc(&zwplug->ref);
679+
680+
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
681+
list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
682+
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
683+
}
684+
685+
static inline void disk_zone_wplug_clear_error(struct gendisk *disk,
686+
struct blk_zone_wplug *zwplug)
687+
{
688+
unsigned long flags;
689+
690+
if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR))
691+
return;
692+
693+
/*
694+
* We are racing with the error handling work which drops the reference
695+
* on the zone write plug after handling the error state. So remove the
696+
* plug from the error list and drop its reference count only if the
697+
* error handling has not yet started, that is, if the zone write plug
698+
* is still listed.
699+
*/
700+
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
701+
if (!list_empty(&zwplug->link)) {
702+
list_del_init(&zwplug->link);
703+
zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
704+
disk_put_zone_wplug(zwplug);
705+
}
706+
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
707+
}
708+
661709
/*
662710
* Set a zone write plug write pointer offset to either 0 (zone reset case)
663711
* or to the zone size (zone finish case). This aborts all plugged BIOs, which
@@ -691,12 +739,7 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
691739
* in a good state. So clear the error flag and decrement the
692740
* error count if we were in error state.
693741
*/
694-
if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) {
695-
zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
696-
spin_lock(&disk->zone_wplugs_lock);
697-
list_del_init(&zwplug->link);
698-
spin_unlock(&disk->zone_wplugs_lock);
699-
}
742+
disk_zone_wplug_clear_error(disk, zwplug);
700743

701744
/*
702745
* The zone write plug now has no BIO plugged: remove it from the
@@ -885,26 +928,6 @@ void blk_zone_write_plug_attempt_merge(struct request *req)
885928
spin_unlock_irqrestore(&zwplug->lock, flags);
886929
}
887930

888-
static inline void disk_zone_wplug_set_error(struct gendisk *disk,
889-
struct blk_zone_wplug *zwplug)
890-
{
891-
if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) {
892-
unsigned long flags;
893-
894-
/*
895-
* Increase the plug reference count. The reference will be
896-
* dropped in disk_zone_wplugs_work() once the error state
897-
* is handled.
898-
*/
899-
zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
900-
atomic_inc(&zwplug->ref);
901-
902-
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
903-
list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
904-
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
905-
}
906-
}
907-
908931
/*
909932
* Check and prepare a BIO for submission by incrementing the write pointer
910933
* offset of its zone write plug and changing zone append operations into

0 commit comments

Comments
 (0)