Skip to content

Commit

Permalink
md/raid10: Allow replacement device to be replace old drive.
Browse files Browse the repository at this point in the history
When recovery finish and spare_active is called, check for a
replace that might have just become fully synced and mark it
as such, marking the original as failed.

Then when the original is removed, move the replacement into
its position.

This means that 'replacement' and spontaneously become NULL in some
situations.  Make sure we check for those.
It also means that 'rdev' and 'replacement' could appear to be
identical - check for that too.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
neilbrown committed Dec 22, 2011
1 parent 24afd80 commit 4ca40c2
Showing 1 changed file with 61 additions and 11 deletions.
72 changes: 61 additions & 11 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,14 +397,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
int dec_rdev = 1;
struct r10conf *conf = r10_bio->mddev->private;
int slot, repl;
struct md_rdev *rdev;
struct md_rdev *rdev = NULL;

dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);

if (repl)
rdev = conf->mirrors[dev].replacement;
else
if (!rdev) {
smp_rmb();
repl = 0;
rdev = conf->mirrors[dev].rdev;
}
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
Expand Down Expand Up @@ -1089,6 +1092,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[d].replacement);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
Expand Down Expand Up @@ -1170,9 +1175,15 @@ static void make_request(struct mddev *mddev, struct bio * bio)
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
}
if (r10_bio->devs[j].repl_bio) {
struct md_rdev *rdev;
d = r10_bio->devs[j].devnum;
rdev_dec_pending(
conf->mirrors[d].replacement, mddev);
rdev = conf->mirrors[d].replacement;
if (!rdev) {
/* Race with remove_disk */
smp_mb();
rdev = conf->mirrors[d].rdev;
}
rdev_dec_pending(rdev, mddev);
}
}
allow_barrier(conf);
Expand Down Expand Up @@ -1230,6 +1241,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
max_sectors);
r10_bio->devs[i].repl_bio = mbio;

/* We are actively writing to the original device
* so it cannot disappear, so the replacement cannot
* become NULL here
*/
mbio->bi_sector = (r10_bio->devs[i].addr+
conf->mirrors[d].replacement->data_offset);
mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
Expand Down Expand Up @@ -1404,9 +1419,27 @@ static int raid10_spare_active(struct mddev *mddev)
*/
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->mirrors + i;
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
if (tmp->replacement
&& tmp->replacement->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->replacement->flags)
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
/* Replacement has just become active */
if (!tmp->rdev
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
count++;
if (tmp->rdev) {
/* Replaced device not technically faulty,
* but we need to be sure it gets removed
* and never re-added.
*/
set_bit(Faulty, &tmp->rdev->flags);
sysfs_notify_dirent_safe(
tmp->rdev->sysfs_state);
}
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
} else if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
count++;
sysfs_notify_dirent(tmp->rdev->sysfs_state);
}
Expand Down Expand Up @@ -1506,6 +1539,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
*/
if (!test_bit(Faulty, &rdev->flags) &&
mddev->recovery_disabled != p->recovery_disabled &&
(!p->replacement || p->replacement == rdev) &&
enough(conf, -1)) {
err = -EBUSY;
goto abort;
Expand All @@ -1517,7 +1551,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY;
*rdevp = rdev;
goto abort;
}
} else if (p->replacement) {
/* We must have just cleared 'rdev' */
p->rdev = p->replacement;
clear_bit(Replacement, &p->replacement->flags);
smp_mb(); /* Make sure other CPUs may see both as identical
* but will never see neither -- if they are careful.
*/
p->replacement = NULL;
clear_bit(WantReplacement, &rdev->flags);
} else
/* We might have just remove the Replacement as faulty
* Clear the flag just in case
*/
clear_bit(WantReplacement, &rdev->flags);

err = md_integrity_register(mddev);

abort:
Expand Down Expand Up @@ -1595,13 +1643,15 @@ static void end_sync_write(struct bio *bio, int error)
int bad_sectors;
int slot;
int repl;
struct md_rdev *rdev;
struct md_rdev *rdev = NULL;

d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
if (repl)
rdev = conf->mirrors[d].replacement;
else
if (!rdev) {
smp_mb();
rdev = conf->mirrors[d].rdev;
}

if (!uptodate) {
if (repl)
Expand Down Expand Up @@ -2368,7 +2418,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
}
bio = r10_bio->devs[m].repl_bio;
rdev = conf->mirrors[dev].replacement;
if (bio == IO_MADE_GOOD) {
if (rdev && bio == IO_MADE_GOOD) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
Expand Down

0 comments on commit 4ca40c2

Please sign in to comment.