Skip to content

Commit dc1cc22

Browse files
Alex Lyakasliu-song-6
authored andcommitted
md: Whenassemble the array, consult the superblock of the freshest device
Upon assembling the array, both kernel and mdadm allow the devices to have event counter difference of 1, and still consider them as up-to-date. However, a device whose event count is behind by 1, may in fact not be up-to-date, and array resync with such a device may cause data corruption. To avoid this, consult the superblock of the freshest device about the status of a device, whose event counter is behind by 1. Signed-off-by: Alex Lyakas <alex.lyakas@zadara.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com
1 parent af140f8 commit dc1cc22

File tree

1 file changed

+44
-10
lines changed

1 file changed

+44
-10
lines changed

drivers/md/md.c

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,7 @@ struct super_type {
12061206
struct md_rdev *refdev,
12071207
int minor_version);
12081208
int (*validate_super)(struct mddev *mddev,
1209+
struct md_rdev *freshest,
12091210
struct md_rdev *rdev);
12101211
void (*sync_super)(struct mddev *mddev,
12111212
struct md_rdev *rdev);
@@ -1343,8 +1344,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
13431344

13441345
/*
13451346
* validate_super for 0.90.0
1347+
* note: we are not using "freshest" for 0.9 superblock
13461348
*/
1347-
static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1349+
static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
13481350
{
13491351
mdp_disk_t *desc;
13501352
mdp_super_t *sb = page_address(rdev->sb_page);
@@ -1856,7 +1858,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
18561858
return ret;
18571859
}
18581860

1859-
static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1861+
static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
18601862
{
18611863
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
18621864
__u64 ev1 = le64_to_cpu(sb->events);
@@ -1952,13 +1954,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
19521954
}
19531955
} else if (mddev->pers == NULL) {
19541956
/* Insist of good event counter while assembling, except for
1955-
* spares (which don't need an event count) */
1956-
++ev1;
1957+
* spares (which don't need an event count).
1958+
* Similar to mdadm, we allow event counter difference of 1
1959+
* from the freshest device.
1960+
*/
19571961
if (rdev->desc_nr >= 0 &&
19581962
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
19591963
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
19601964
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1961-
if (ev1 < mddev->events)
1965+
if (ev1 + 1 < mddev->events)
19621966
return -EINVAL;
19631967
} else if (mddev->bitmap) {
19641968
/* If adding to array with a bitmap, then we can accept an
@@ -1979,8 +1983,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
19791983
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
19801984
role = MD_DISK_ROLE_SPARE;
19811985
rdev->desc_nr = -1;
1982-
} else
1986+
} else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
1987+
/*
1988+
* If we are assembling, and our event counter is smaller than the
1989+
* highest event counter, we cannot trust our superblock about the role.
1990+
* It could happen that our rdev was marked as Faulty, and all other
1991+
* superblocks were updated with +1 event counter.
1992+
* Then, before the next superblock update, which typically happens when
1993+
* remove_and_add_spares() removes the device from the array, there was
1994+
* a crash or reboot.
1995+
* If we allow current rdev without consulting the freshest superblock,
1996+
* we could cause data corruption.
1997+
* Note that in this case our event counter is smaller by 1 than the
1998+
* highest, otherwise, this rdev would not be allowed into array;
1999+
* both kernel and mdadm allow event counter difference of 1.
2000+
*/
2001+
struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
2002+
u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
2003+
2004+
if (rdev->desc_nr >= freshest_max_dev) {
2005+
/* this is unexpected, better not proceed */
2006+
pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
2007+
mdname(mddev), rdev->bdev, rdev->desc_nr,
2008+
freshest->bdev, freshest_max_dev);
2009+
return -EUCLEAN;
2010+
}
2011+
2012+
role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
2013+
pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
2014+
mdname(mddev), rdev->bdev, role, role, freshest->bdev);
2015+
} else {
19832016
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2017+
}
19842018
switch(role) {
19852019
case MD_DISK_ROLE_SPARE: /* spare */
19862020
break;
@@ -2887,7 +2921,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
28872921
* and should be added immediately.
28882922
*/
28892923
super_types[mddev->major_version].
2890-
validate_super(mddev, rdev);
2924+
validate_super(mddev, NULL/*freshest*/, rdev);
28912925
err = mddev->pers->hot_add_disk(mddev, rdev);
28922926
if (err) {
28932927
md_kick_rdev_from_array(rdev);
@@ -3824,7 +3858,7 @@ static int analyze_sbs(struct mddev *mddev)
38243858
}
38253859

38263860
super_types[mddev->major_version].
3827-
validate_super(mddev, freshest);
3861+
validate_super(mddev, NULL/*freshest*/, freshest);
38283862

38293863
i = 0;
38303864
rdev_for_each_safe(rdev, tmp, mddev) {
@@ -3839,7 +3873,7 @@ static int analyze_sbs(struct mddev *mddev)
38393873
}
38403874
if (rdev != freshest) {
38413875
if (super_types[mddev->major_version].
3842-
validate_super(mddev, rdev)) {
3876+
validate_super(mddev, freshest, rdev)) {
38433877
pr_warn("md: kicking non-fresh %pg from array!\n",
38443878
rdev->bdev);
38453879
md_kick_rdev_from_array(rdev);
@@ -6847,7 +6881,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
68476881
rdev->saved_raid_disk = rdev->raid_disk;
68486882
} else
68496883
super_types[mddev->major_version].
6850-
validate_super(mddev, rdev);
6884+
validate_super(mddev, NULL/*freshest*/, rdev);
68516885
if ((info->state & (1<<MD_DISK_SYNC)) &&
68526886
rdev->raid_disk != info->raid_disk) {
68536887
/* This was a hot-add request, but events doesn't

0 commit comments

Comments
 (0)