@@ -1206,6 +1206,7 @@ struct super_type {
12061206 struct md_rdev * refdev ,
12071207 int minor_version );
12081208 int (* validate_super )(struct mddev * mddev ,
1209+ struct md_rdev * freshest ,
12091210 struct md_rdev * rdev );
12101211 void (* sync_super )(struct mddev * mddev ,
12111212 struct md_rdev * rdev );
@@ -1343,8 +1344,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
13431344
13441345/*
13451346 * validate_super for 0.90.0
1347+ * note: we are not using "freshest" for 0.9 superblock
13461348 */
1347- static int super_90_validate (struct mddev * mddev , struct md_rdev * rdev )
1349+ static int super_90_validate (struct mddev * mddev , struct md_rdev * freshest , struct md_rdev * rdev )
13481350{
13491351 mdp_disk_t * desc ;
13501352 mdp_super_t * sb = page_address (rdev -> sb_page );
@@ -1856,7 +1858,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
18561858 return ret ;
18571859}
18581860
1859- static int super_1_validate (struct mddev * mddev , struct md_rdev * rdev )
1861+ static int super_1_validate (struct mddev * mddev , struct md_rdev * freshest , struct md_rdev * rdev )
18601862{
18611863 struct mdp_superblock_1 * sb = page_address (rdev -> sb_page );
18621864 __u64 ev1 = le64_to_cpu (sb -> events );
@@ -1952,13 +1954,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
19521954 }
19531955 } else if (mddev -> pers == NULL ) {
19541956 /* Insist of good event counter while assembling, except for
1955- * spares (which don't need an event count) */
1956- ++ ev1 ;
1957+ * spares (which don't need an event count).
1958+ * Similar to mdadm, we allow event counter difference of 1
1959+ * from the freshest device.
1960+ */
19571961 if (rdev -> desc_nr >= 0 &&
19581962 rdev -> desc_nr < le32_to_cpu (sb -> max_dev ) &&
19591963 (le16_to_cpu (sb -> dev_roles [rdev -> desc_nr ]) < MD_DISK_ROLE_MAX ||
19601964 le16_to_cpu (sb -> dev_roles [rdev -> desc_nr ]) == MD_DISK_ROLE_JOURNAL ))
1961- if (ev1 < mddev -> events )
1965+ if (ev1 + 1 < mddev -> events )
19621966 return - EINVAL ;
19631967 } else if (mddev -> bitmap ) {
19641968 /* If adding to array with a bitmap, then we can accept an
@@ -1979,8 +1983,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
19791983 rdev -> desc_nr >= le32_to_cpu (sb -> max_dev )) {
19801984 role = MD_DISK_ROLE_SPARE ;
19811985 rdev -> desc_nr = -1 ;
1982- } else
1986+ } else if (mddev -> pers == NULL && freshest && ev1 < mddev -> events ) {
1987+ /*
1988+ * If we are assembling, and our event counter is smaller than the
1989+ * highest event counter, we cannot trust our superblock about the role.
1990+ * It could happen that our rdev was marked as Faulty, and all other
1991+ * superblocks were updated with +1 event counter.
1992+ * Then, before the next superblock update, which typically happens when
1993+ * remove_and_add_spares() removes the device from the array, there was
1994+ * a crash or reboot.
1995+ * If we allow current rdev without consulting the freshest superblock,
1996+ * we could cause data corruption.
1997+ * Note that in this case our event counter is smaller by 1 than the
1998+ * highest, otherwise, this rdev would not be allowed into array;
1999+ * both kernel and mdadm allow event counter difference of 1.
2000+ */
2001+ struct mdp_superblock_1 * freshest_sb = page_address (freshest -> sb_page );
2002+ u32 freshest_max_dev = le32_to_cpu (freshest_sb -> max_dev );
2003+
2004+ if (rdev -> desc_nr >= freshest_max_dev ) {
2005+ /* this is unexpected, better not proceed */
2006+ pr_warn ("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n" ,
2007+ mdname (mddev ), rdev -> bdev , rdev -> desc_nr ,
2008+ freshest -> bdev , freshest_max_dev );
2009+ return - EUCLEAN ;
2010+ }
2011+
2012+ role = le16_to_cpu (freshest_sb -> dev_roles [rdev -> desc_nr ]);
2013+ pr_debug ("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n" ,
2014+ mdname (mddev ), rdev -> bdev , role , role , freshest -> bdev );
2015+ } else {
19832016 role = le16_to_cpu (sb -> dev_roles [rdev -> desc_nr ]);
2017+ }
19842018 switch (role ) {
19852019 case MD_DISK_ROLE_SPARE : /* spare */
19862020 break ;
@@ -2887,7 +2921,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
28872921 * and should be added immediately.
28882922 */
28892923 super_types [mddev -> major_version ].
2890- validate_super (mddev , rdev );
2924+ validate_super (mddev , NULL /*freshest*/ , rdev );
28912925 err = mddev -> pers -> hot_add_disk (mddev , rdev );
28922926 if (err ) {
28932927 md_kick_rdev_from_array (rdev );
@@ -3824,7 +3858,7 @@ static int analyze_sbs(struct mddev *mddev)
38243858 }
38253859
38263860 super_types [mddev -> major_version ].
3827- validate_super (mddev , freshest );
3861+ validate_super (mddev , NULL /*freshest*/ , freshest );
38283862
38293863 i = 0 ;
38303864 rdev_for_each_safe (rdev , tmp , mddev ) {
@@ -3839,7 +3873,7 @@ static int analyze_sbs(struct mddev *mddev)
38393873 }
38403874 if (rdev != freshest ) {
38413875 if (super_types [mddev -> major_version ].
3842- validate_super (mddev , rdev )) {
3876+ validate_super (mddev , freshest , rdev )) {
38433877 pr_warn ("md: kicking non-fresh %pg from array!\n" ,
38443878 rdev -> bdev );
38453879 md_kick_rdev_from_array (rdev );
@@ -6847,7 +6881,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
68476881 rdev -> saved_raid_disk = rdev -> raid_disk ;
68486882 } else
68496883 super_types [mddev -> major_version ].
6850- validate_super (mddev , rdev );
6884+ validate_super (mddev , NULL /*freshest*/ , rdev );
68516885 if ((info -> state & (1 <<MD_DISK_SYNC )) &&
68526886 rdev -> raid_disk != info -> raid_disk ) {
68536887 /* This was a hot-add request, but events doesn't
0 commit comments