Skip to content

Commit c0ffeb6

Browse files
Zheng QixingYuKuai-huawei
authored andcommitted
md: allow removing faulty rdev during resync
During RAID resync, faulty rdev cannot be removed and will result in "Device or resource busy" error when attempting hot removal. Reproduction steps: mdadm -Cv /dev/md0 -l1 -n3 -e1.2 /dev/sd{b..d} mdadm /dev/md0 -f /dev/sdb mdadm /dev/md0 -r /dev/sdb -> mdadm: hot remove failed for /dev/sdb: Device or resource busy After commit 4b10a3b ("md: ensure resync is prioritized over recovery"), when a device becomes faulty during resync, the md_choose_sync_action() function returns early without calling remove_and_add_spares(), preventing faulty device removal. This patch extracts a helper function remove_spares() to support removing faulty devices during RAID resync operations. Fixes: 4b10a3b ("md: ensure resync is prioritized over recovery") Signed-off-by: Zheng Qixing <zhengqixing@huawei.com> Reviewed-by: Li Nan <linan122@huawei.com> Link: https://lore.kernel.org/linux-raid/20250707075412.150301-1-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai <yukuai3@huawei.com>
1 parent 3ec8db6 commit c0ffeb6

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

drivers/md/md.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9459,17 +9459,11 @@ static bool md_spares_need_change(struct mddev *mddev)
94599459
return false;
94609460
}
94619461

9462-
static int remove_and_add_spares(struct mddev *mddev,
9463-
struct md_rdev *this)
9462+
static int remove_spares(struct mddev *mddev, struct md_rdev *this)
94649463
{
94659464
struct md_rdev *rdev;
9466-
int spares = 0;
94679465
int removed = 0;
94689466

9469-
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9470-
/* Mustn't remove devices when resync thread is running */
9471-
return 0;
9472-
94739467
rdev_for_each(rdev, mddev) {
94749468
if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
94759469
!mddev->pers->hot_remove_disk(mddev, rdev)) {
@@ -9483,6 +9477,21 @@ static int remove_and_add_spares(struct mddev *mddev,
94839477
if (removed && mddev->kobj.sd)
94849478
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
94859479

9480+
return removed;
9481+
}
9482+
9483+
static int remove_and_add_spares(struct mddev *mddev,
9484+
struct md_rdev *this)
9485+
{
9486+
struct md_rdev *rdev;
9487+
int spares = 0;
9488+
int removed = 0;
9489+
9490+
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9491+
/* Mustn't remove devices when resync thread is running */
9492+
return 0;
9493+
9494+
removed = remove_spares(mddev, this);
94869495
if (this && removed)
94879496
goto no_add;
94889497

@@ -9525,6 +9534,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
95259534

95269535
/* Check if resync is in progress. */
95279536
if (mddev->recovery_cp < MaxSector) {
9537+
remove_spares(mddev, NULL);
95289538
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
95299539
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
95309540
return true;

0 commit comments

Comments
 (0)