Skip to content

Commit 4e5314b

Browse files
neilbrownLinus Torvalds
authored andcommitted
[PATCH] md: better handling of readerrors with raid5.
This patch changes the behaviour of raid5 when it gets a read error. Instead of just failing the device, it tried to find out what should have been there, and writes it over the bad block. For some media-errors, this has a reasonable chance of fixing the error. If the write succeeds, and a subsequent read succeeds as well, raid5 decided the address is OK and conitnues. Instead of failing a drive on read-error, we attempt to re-write the block, and then re-read. If that all works, we allow the device to remain in the array. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 703ebe8 commit 4e5314b

File tree

2 files changed

+58
-5
lines changed

2 files changed

+58
-5
lines changed

drivers/md/raid5.c

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ static void shrink_stripes(raid5_conf_t *conf)
349349
conf->slab_cache = NULL;
350350
}
351351

352-
static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done,
352+
static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
353353
int error)
354354
{
355355
struct stripe_head *sh = bi->bi_private;
@@ -401,10 +401,27 @@ static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done,
401401
}
402402
#else
403403
set_bit(R5_UPTODATE, &sh->dev[i].flags);
404-
#endif
404+
#endif
405+
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
406+
printk("R5: read error corrected!!\n");
407+
clear_bit(R5_ReadError, &sh->dev[i].flags);
408+
clear_bit(R5_ReWrite, &sh->dev[i].flags);
409+
}
405410
} else {
406-
md_error(conf->mddev, conf->disks[i].rdev);
407411
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
412+
if (conf->mddev->degraded) {
413+
printk("R5: read error not correctable.\n");
414+
clear_bit(R5_ReadError, &sh->dev[i].flags);
415+
clear_bit(R5_ReWrite, &sh->dev[i].flags);
416+
md_error(conf->mddev, conf->disks[i].rdev);
417+
} else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
418+
/* Oh, no!!! */
419+
printk("R5: read error NOT corrected!!\n");
420+
clear_bit(R5_ReadError, &sh->dev[i].flags);
421+
clear_bit(R5_ReWrite, &sh->dev[i].flags);
422+
md_error(conf->mddev, conf->disks[i].rdev);
423+
} else
424+
set_bit(R5_ReadError, &sh->dev[i].flags);
408425
}
409426
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
410427
#if 0
@@ -966,6 +983,12 @@ static void handle_stripe(struct stripe_head *sh)
966983
if (dev->written) written++;
967984
rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
968985
if (!rdev || !rdev->in_sync) {
986+
/* The ReadError flag wil just be confusing now */
987+
clear_bit(R5_ReadError, &dev->flags);
988+
clear_bit(R5_ReWrite, &dev->flags);
989+
}
990+
if (!rdev || !rdev->in_sync
991+
|| test_bit(R5_ReadError, &dev->flags)) {
969992
failed++;
970993
failed_num = i;
971994
} else
@@ -980,6 +1003,14 @@ static void handle_stripe(struct stripe_head *sh)
9801003
if (failed > 1 && to_read+to_write+written) {
9811004
for (i=disks; i--; ) {
9821005
int bitmap_end = 0;
1006+
1007+
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1008+
mdk_rdev_t *rdev = conf->disks[i].rdev;
1009+
if (rdev && rdev->in_sync)
1010+
/* multiple read failures in one stripe */
1011+
md_error(conf->mddev, rdev);
1012+
}
1013+
9831014
spin_lock_irq(&conf->device_lock);
9841015
/* fail all writes first */
9851016
bi = sh->dev[i].towrite;
@@ -1015,7 +1046,8 @@ static void handle_stripe(struct stripe_head *sh)
10151046
}
10161047

10171048
/* fail any reads if this device is non-operational */
1018-
if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
1049+
if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
1050+
test_bit(R5_ReadError, &sh->dev[i].flags)) {
10191051
bi = sh->dev[i].toread;
10201052
sh->dev[i].toread = NULL;
10211053
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
@@ -1274,7 +1306,26 @@ static void handle_stripe(struct stripe_head *sh)
12741306
md_done_sync(conf->mddev, STRIPE_SECTORS,1);
12751307
clear_bit(STRIPE_SYNCING, &sh->state);
12761308
}
1277-
1309+
1310+
/* If the failed drive is just a ReadError, then we might need to progress
1311+
* the repair/check process
1312+
*/
1313+
if (failed == 1 && test_bit(R5_ReadError, &sh->dev[failed_num].flags)
1314+
&& !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
1315+
&& test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
1316+
) {
1317+
dev = &sh->dev[failed_num];
1318+
if (!test_bit(R5_ReWrite, &dev->flags)) {
1319+
set_bit(R5_Wantwrite, &dev->flags);
1320+
set_bit(R5_ReWrite, &dev->flags);
1321+
set_bit(R5_LOCKED, &dev->flags);
1322+
} else {
1323+
/* let's read it back */
1324+
set_bit(R5_Wantread, &dev->flags);
1325+
set_bit(R5_LOCKED, &dev->flags);
1326+
}
1327+
}
1328+
12781329
spin_unlock(&sh->lock);
12791330

12801331
while ((bi=return_bi)) {

include/linux/raid/raid5.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ struct stripe_head {
154154
#define R5_Wantwrite 5
155155
#define R5_Syncio 6 /* this io need to be accounted as resync io */
156156
#define R5_Overlap 7 /* There is a pending overlapping request on this block */
157+
#define R5_ReadError 8 /* seen a read error here recently */
158+
#define R5_ReWrite 9 /* have tried to over-write the readerror */
157159

158160
/*
159161
* Write method

0 commit comments

Comments
 (0)