Skip to content

Commit 1979dbb

Browse files
Li Nanliu-song-6
authored andcommitted
md: factor out a helper exceed_read_errors() to check read_errors
Move check_decay_read_errors() to raid1-10.c and factor out a helper exceed_read_errors() to check if read_errors exceeds the limit, so that raid1 can also use it. There are no functional changes. Signed-off-by: Li Nan <linan122@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231215023852.3478228-2-linan666@huaweicloud.com
1 parent dc1cc22 commit 1979dbb

File tree

3 files changed

+58
-46
lines changed

3 files changed

+58
-46
lines changed

drivers/md/raid1-10.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
173173
else
174174
md_bitmap_unplug(bitmap);
175175
}
176+
177+
/*
178+
* Used by fix_read_error() to decay the per rdev read_errors.
179+
* We halve the read error count for every hour that has elapsed
180+
* since the last recorded read error.
181+
*/
182+
static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
183+
{
184+
long cur_time_mon;
185+
unsigned long hours_since_last;
186+
unsigned int read_errors = atomic_read(&rdev->read_errors);
187+
188+
cur_time_mon = ktime_get_seconds();
189+
190+
if (rdev->last_read_error == 0) {
191+
/* first time we've seen a read error */
192+
rdev->last_read_error = cur_time_mon;
193+
return;
194+
}
195+
196+
hours_since_last = (long)(cur_time_mon -
197+
rdev->last_read_error) / 3600;
198+
199+
rdev->last_read_error = cur_time_mon;
200+
201+
/*
202+
* if hours_since_last is > the number of bits in read_errors
203+
* just set read errors to 0. We do this to avoid
204+
* overflowing the shift of read_errors by hours_since_last.
205+
*/
206+
if (hours_since_last >= 8 * sizeof(read_errors))
207+
atomic_set(&rdev->read_errors, 0);
208+
else
209+
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
210+
}
211+
212+
static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
213+
{
214+
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
215+
int read_errors;
216+
217+
check_decay_read_errors(mddev, rdev);
218+
read_errors = atomic_inc_return(&rdev->read_errors);
219+
if (read_errors > max_read_errors) {
220+
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
221+
mdname(mddev), rdev->bdev, read_errors, max_read_errors);
222+
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Failing raid device\n",
223+
mdname(mddev), rdev->bdev);
224+
md_error(mddev, rdev);
225+
return true;
226+
}
227+
228+
return false;
229+
}

drivers/md/raid1.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
4949
#define raid1_log(md, fmt, args...) \
5050
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
5151

52+
#define RAID_1_10_NAME "raid1"
5253
#include "raid1-10.c"
5354

5455
#define START(node) ((node)->start)

drivers/md/raid10.c

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include <linux/raid/md_p.h>
2020
#include <trace/events/block.h>
2121
#include "md.h"
22+
23+
#define RAID_1_10_NAME "raid10"
2224
#include "raid10.h"
2325
#include "raid0.h"
2426
#include "md-bitmap.h"
@@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
25922594
}
25932595
}
25942596

2595-
/*
2596-
* Used by fix_read_error() to decay the per rdev read_errors.
2597-
* We halve the read error count for every hour that has elapsed
2598-
* since the last recorded read error.
2599-
*
2600-
*/
2601-
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
2602-
{
2603-
long cur_time_mon;
2604-
unsigned long hours_since_last;
2605-
unsigned int read_errors = atomic_read(&rdev->read_errors);
2606-
2607-
cur_time_mon = ktime_get_seconds();
2608-
2609-
if (rdev->last_read_error == 0) {
2610-
/* first time we've seen a read error */
2611-
rdev->last_read_error = cur_time_mon;
2612-
return;
2613-
}
2614-
2615-
hours_since_last = (long)(cur_time_mon -
2616-
rdev->last_read_error) / 3600;
2617-
2618-
rdev->last_read_error = cur_time_mon;
2619-
2620-
/*
2621-
* if hours_since_last is > the number of bits in read_errors
2622-
* just set read errors to 0. We do this to avoid
2623-
* overflowing the shift of read_errors by hours_since_last.
2624-
*/
2625-
if (hours_since_last >= 8 * sizeof(read_errors))
2626-
atomic_set(&rdev->read_errors, 0);
2627-
else
2628-
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
2629-
}
2630-
26312597
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
26322598
int sectors, struct page *page, enum req_op op)
26332599
{
@@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
26652631
int sect = 0; /* Offset from r10_bio->sector */
26662632
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
26672633
struct md_rdev *rdev;
2668-
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
26692634
int d = r10_bio->devs[slot].devnum;
26702635

26712636
/* still own a reference to this rdev, so it cannot
@@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
26782643
more fix_read_error() attempts */
26792644
return;
26802645

2681-
check_decay_read_errors(mddev, rdev);
2682-
atomic_inc(&rdev->read_errors);
2683-
if (atomic_read(&rdev->read_errors) > max_read_errors) {
2684-
pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
2685-
mdname(mddev), rdev->bdev,
2686-
atomic_read(&rdev->read_errors), max_read_errors);
2687-
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
2688-
mdname(mddev), rdev->bdev);
2689-
md_error(mddev, rdev);
2646+
if (exceed_read_errors(mddev, rdev)) {
26902647
r10_bio->devs[slot].bio = IO_BLOCKED;
26912648
return;
26922649
}

0 commit comments

Comments
 (0)