Skip to content

Commit 688834e

Browse files
NeilBrownshligit
authored andcommitted
md/failfast: add failfast flag for md to be used by some personalities.
This patch just adds a 'failfast' per-device flag which can be stored in v0.90 or v1.x metadata. The flag is not used yet but the intent is that it can be used for mirrored (raid1/raid10) arrays where low latency is more important than keeping all devices on-line. Setting the flag for a device effectively gives permission for that device to be marked as Faulty and excluded from the array on the first error. The underlying driver will be directed not to retry requests that result in failures. There is a proviso that the device must not be marked faulty if that would cause the array as a whole to fail, it may only be marked Faulty if the array remains functional, but is degraded. Failures on read requests will cause the device to be marked as Faulty immediately so that further reads will avoid that device. No attempt will be made to correct read errors by over-writing with the correct data. It is expected that if transient errors, such as cable unplug, are possible, then something in user-space will revalidate failed devices and re-add them when they appear to be working again. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
1 parent 3bddb7f commit 688834e

File tree

3 files changed

+39
-1
lines changed

3 files changed

+39
-1
lines changed

drivers/md/md.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
11641164
}
11651165
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
11661166
set_bit(WriteMostly, &rdev->flags);
1167+
if (desc->state & (1<<MD_DISK_FAILFAST))
1168+
set_bit(FailFast, &rdev->flags);
11671169
} else /* MULTIPATH are always insync */
11681170
set_bit(In_sync, &rdev->flags);
11691171
return 0;
@@ -1289,6 +1291,8 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
12891291
}
12901292
if (test_bit(WriteMostly, &rdev2->flags))
12911293
d->state |= (1<<MD_DISK_WRITEMOSTLY);
1294+
if (test_bit(FailFast, &rdev2->flags))
1295+
d->state |= (1<<MD_DISK_FAILFAST);
12921296
}
12931297
/* now set the "removed" and "faulty" bits on any missing devices */
12941298
for (i=0 ; i < mddev->raid_disks ; i++) {
@@ -1673,6 +1677,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
16731677
}
16741678
if (sb->devflags & WriteMostly1)
16751679
set_bit(WriteMostly, &rdev->flags);
1680+
if (sb->devflags & FailFast1)
1681+
set_bit(FailFast, &rdev->flags);
16761682
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
16771683
set_bit(Replacement, &rdev->flags);
16781684
} else /* MULTIPATH are always insync */
@@ -1711,6 +1717,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
17111717
sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
17121718
sb->level = cpu_to_le32(mddev->level);
17131719
sb->layout = cpu_to_le32(mddev->layout);
1720+
if (test_bit(FailFast, &rdev->flags))
1721+
sb->devflags |= FailFast1;
1722+
else
1723+
sb->devflags &= ~FailFast1;
17141724

17151725
if (test_bit(WriteMostly, &rdev->flags))
17161726
sb->devflags |= WriteMostly1;
@@ -2557,6 +2567,8 @@ state_show(struct md_rdev *rdev, char *page)
25572567
len += sprintf(page+len, "replacement%s", sep);
25582568
if (test_bit(ExternalBbl, &flags))
25592569
len += sprintf(page+len, "external_bbl%s", sep);
2570+
if (test_bit(FailFast, &flags))
2571+
len += sprintf(page+len, "failfast%s", sep);
25602572

25612573
if (len)
25622574
len -= strlen(sep);
@@ -2579,6 +2591,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
25792591
* so that it gets rebuilt based on bitmap
25802592
* write_error - sets WriteErrorSeen
25812593
* -write_error - clears WriteErrorSeen
2594+
* {,-}failfast - set/clear FailFast
25822595
*/
25832596
int err = -EINVAL;
25842597
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2637,6 +2650,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
26372650
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
26382651
set_bit(In_sync, &rdev->flags);
26392652
err = 0;
2653+
} else if (cmd_match(buf, "failfast")) {
2654+
set_bit(FailFast, &rdev->flags);
2655+
err = 0;
2656+
} else if (cmd_match(buf, "-failfast")) {
2657+
clear_bit(FailFast, &rdev->flags);
2658+
err = 0;
26402659
} else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
26412660
!test_bit(Journal, &rdev->flags)) {
26422661
if (rdev->mddev->pers == NULL) {
@@ -5942,6 +5961,8 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
59425961
info.state |= (1<<MD_DISK_JOURNAL);
59435962
if (test_bit(WriteMostly, &rdev->flags))
59445963
info.state |= (1<<MD_DISK_WRITEMOSTLY);
5964+
if (test_bit(FailFast, &rdev->flags))
5965+
info.state |= (1<<MD_DISK_FAILFAST);
59455966
} else {
59465967
info.major = info.minor = 0;
59475968
info.raid_disk = -1;
@@ -6049,6 +6070,10 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
60496070
set_bit(WriteMostly, &rdev->flags);
60506071
else
60516072
clear_bit(WriteMostly, &rdev->flags);
6073+
if (info->state & (1<<MD_DISK_FAILFAST))
6074+
set_bit(FailFast, &rdev->flags);
6075+
else
6076+
clear_bit(FailFast, &rdev->flags);
60526077

60536078
if (info->state & (1<<MD_DISK_JOURNAL)) {
60546079
struct md_rdev *rdev2;
@@ -6138,6 +6163,8 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
61386163

61396164
if (info->state & (1<<MD_DISK_WRITEMOSTLY))
61406165
set_bit(WriteMostly, &rdev->flags);
6166+
if (info->state & (1<<MD_DISK_FAILFAST))
6167+
set_bit(FailFast, &rdev->flags);
61416168

61426169
if (!mddev->persistent) {
61436170
pr_debug("md: nonpersistent superblock ...\n");

drivers/md/md.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ enum flag_bits {
171171
ExternalBbl, /* External metadata provides bad
172172
* block management for a disk
173173
*/
174+
FailFast, /* Minimal retries should be attempted on
175+
* this device, so use REQ_FAILFAST_DEV.
176+
* Also don't try to repair failed reads.
177+
* It is expects that no bad block log
178+
* is present.
179+
*/
174180
};
175181

176182
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,

include/uapi/linux/raid/md_p.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@
8484
#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
8585
* For clustered enviroments only.
8686
*/
87+
#define MD_DISK_FAILFAST 10 /* Send REQ_FAILFAST if there are multiple
88+
* devices available - and don't try to
89+
* correct read errors.
90+
*/
8791

8892
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
8993
* read requests will only be sent here in
@@ -265,8 +269,9 @@ struct mdp_superblock_1 {
265269
__le32 dev_number; /* permanent identifier of this device - not role in raid */
266270
__le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
267271
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
268-
__u8 devflags; /* per-device flags. Only one defined...*/
272+
__u8 devflags; /* per-device flags. Only two defined...*/
269273
#define WriteMostly1 1 /* mask for writemostly flag in above */
274+
#define FailFast1 2 /* Should avoid retries and fixups and just fail */
270275
/* Bad block log. If there are any bad blocks the feature flag is set.
271276
* If offset and size are non-zero, that space is reserved and available
272277
*/

0 commit comments

Comments
 (0)