@@ -2337,7 +2337,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
23372337 for (i = disks ; i -- ; ) {
23382338 struct r5dev * dev = & sh -> dev [i ];
23392339 if (test_and_clear_bit (R5_Overlap , & dev -> flags ))
2340- wake_up ( & sh -> raid_conf -> wait_for_overlap );
2340+ wake_up_bit ( & dev -> flags , R5_Overlap );
23412341 }
23422342 }
23432343 local_unlock (& conf -> percpu -> lock );
@@ -3473,7 +3473,7 @@ static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
34733473 * With PPL only writes to consecutive data chunks within a
34743474 * stripe are allowed because for a single stripe_head we can
34753475 * only have one PPL entry at a time, which describes one data
3476- * range. Not really an overlap, but wait_for_overlap can be
3476+ * range. Not really an overlap, but R5_Overlap can be
34773477 * used to handle this.
34783478 */
34793479 sector_t sector ;
@@ -3652,7 +3652,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
36523652 log_stripe_write_finished (sh );
36533653
36543654 if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
3655- wake_up ( & conf -> wait_for_overlap );
3655+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap );
36563656
36573657 while (bi && bi -> bi_iter .bi_sector <
36583658 sh -> dev [i ].sector + RAID5_STRIPE_SECTORS (conf )) {
@@ -3697,7 +3697,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
36973697 sh -> dev [i ].toread = NULL ;
36983698 spin_unlock_irq (& sh -> stripe_lock );
36993699 if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
3700- wake_up ( & conf -> wait_for_overlap );
3700+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap );
37013701 if (bi )
37023702 s -> to_read -- ;
37033703 while (bi && bi -> bi_iter .bi_sector <
@@ -3736,7 +3736,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
37363736 BUG_ON (sh -> batch_head );
37373737 clear_bit (STRIPE_SYNCING , & sh -> state );
37383738 if (test_and_clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags ))
3739- wake_up ( & conf -> wait_for_overlap );
3739+ wake_up_bit ( & sh -> dev [ sh -> pd_idx ]. flags , R5_Overlap );
37403740 s -> syncing = 0 ;
37413741 s -> replacing = 0 ;
37423742 /* There is nothing more to do for sync/check/repair.
@@ -4877,7 +4877,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
48774877{
48784878 struct stripe_head * sh , * next ;
48794879 int i ;
4880- int do_wakeup = 0 ;
48814880
48824881 list_for_each_entry_safe (sh , next , & head_sh -> batch_list , batch_list ) {
48834882
@@ -4913,7 +4912,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
49134912 spin_unlock_irq (& sh -> stripe_lock );
49144913 for (i = 0 ; i < sh -> disks ; i ++ ) {
49154914 if (test_and_clear_bit (R5_Overlap , & sh -> dev [i ].flags ))
4916- do_wakeup = 1 ;
4915+ wake_up_bit ( & sh -> dev [ i ]. flags , R5_Overlap ) ;
49174916 sh -> dev [i ].flags = head_sh -> dev [i ].flags &
49184917 (~((1 << R5_WriteError ) | (1 << R5_Overlap )));
49194918 }
@@ -4927,12 +4926,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
49274926 spin_unlock_irq (& head_sh -> stripe_lock );
49284927 for (i = 0 ; i < head_sh -> disks ; i ++ )
49294928 if (test_and_clear_bit (R5_Overlap , & head_sh -> dev [i ].flags ))
4930- do_wakeup = 1 ;
4929+ wake_up_bit ( & head_sh -> dev [ i ]. flags , R5_Overlap ) ;
49314930 if (head_sh -> state & handle_flags )
49324931 set_bit (STRIPE_HANDLE , & head_sh -> state );
4933-
4934- if (do_wakeup )
4935- wake_up (& head_sh -> raid_conf -> wait_for_overlap );
49364932}
49374933
49384934static void handle_stripe (struct stripe_head * sh )
@@ -5198,7 +5194,7 @@ static void handle_stripe(struct stripe_head *sh)
51985194 md_done_sync (conf -> mddev , RAID5_STRIPE_SECTORS (conf ), 1 );
51995195 clear_bit (STRIPE_SYNCING , & sh -> state );
52005196 if (test_and_clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags ))
5201- wake_up ( & conf -> wait_for_overlap );
5197+ wake_up_bit ( & sh -> dev [ sh -> pd_idx ]. flags , R5_Overlap );
52025198 }
52035199
52045200 /* If the failed drives are just a ReadError, then we might need
@@ -5261,7 +5257,7 @@ static void handle_stripe(struct stripe_head *sh)
52615257 } else if (s .expanded && !sh -> reconstruct_state && s .locked == 0 ) {
52625258 clear_bit (STRIPE_EXPAND_READY , & sh -> state );
52635259 atomic_dec (& conf -> reshape_stripes );
5264- wake_up (& conf -> wait_for_overlap );
5260+ wake_up (& conf -> wait_for_reshape );
52655261 md_done_sync (conf -> mddev , RAID5_STRIPE_SECTORS (conf ), 1 );
52665262 }
52675263
@@ -5755,12 +5751,11 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
57555751 int d ;
57565752 again :
57575753 sh = raid5_get_active_stripe (conf , NULL , logical_sector , 0 );
5758- prepare_to_wait (& conf -> wait_for_overlap , & w ,
5759- TASK_UNINTERRUPTIBLE );
57605754 set_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags );
57615755 if (test_bit (STRIPE_SYNCING , & sh -> state )) {
57625756 raid5_release_stripe (sh );
5763- schedule ();
5757+ wait_on_bit (& sh -> dev [sh -> pd_idx ].flags , R5_Overlap ,
5758+ TASK_UNINTERRUPTIBLE );
57645759 goto again ;
57655760 }
57665761 clear_bit (R5_Overlap , & sh -> dev [sh -> pd_idx ].flags );
@@ -5772,12 +5767,12 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
57725767 set_bit (R5_Overlap , & sh -> dev [d ].flags );
57735768 spin_unlock_irq (& sh -> stripe_lock );
57745769 raid5_release_stripe (sh );
5775- schedule ();
5770+ wait_on_bit (& sh -> dev [d ].flags , R5_Overlap ,
5771+ TASK_UNINTERRUPTIBLE );
57765772 goto again ;
57775773 }
57785774 }
57795775 set_bit (STRIPE_DISCARD , & sh -> state );
5780- finish_wait (& conf -> wait_for_overlap , & w );
57815776 sh -> overwrite_disks = 0 ;
57825777 for (d = 0 ; d < conf -> raid_disks ; d ++ ) {
57835778 if (d == sh -> pd_idx || d == sh -> qd_idx )
@@ -5854,7 +5849,6 @@ static int add_all_stripe_bios(struct r5conf *conf,
58545849 struct bio * bi , int forwrite , int previous )
58555850{
58565851 int dd_idx ;
5857- int ret = 1 ;
58585852
58595853 spin_lock_irq (& sh -> stripe_lock );
58605854
@@ -5870,14 +5864,19 @@ static int add_all_stripe_bios(struct r5conf *conf,
58705864
58715865 if (stripe_bio_overlaps (sh , bi , dd_idx , forwrite )) {
58725866 set_bit (R5_Overlap , & dev -> flags );
5873- ret = 0 ;
5874- continue ;
5867+ spin_unlock_irq (& sh -> stripe_lock );
5868+ raid5_release_stripe (sh );
5869+ /* release batch_last before wait to avoid risk of deadlock */
5870+ if (ctx -> batch_last ) {
5871+ raid5_release_stripe (ctx -> batch_last );
5872+ ctx -> batch_last = NULL ;
5873+ }
5874+ md_wakeup_thread (conf -> mddev -> thread );
5875+ wait_on_bit (& dev -> flags , R5_Overlap , TASK_UNINTERRUPTIBLE );
5876+ return 0 ;
58755877 }
58765878 }
58775879
5878- if (!ret )
5879- goto out ;
5880-
58815880 for (dd_idx = 0 ; dd_idx < sh -> disks ; dd_idx ++ ) {
58825881 struct r5dev * dev = & sh -> dev [dd_idx ];
58835882
@@ -5893,9 +5892,8 @@ static int add_all_stripe_bios(struct r5conf *conf,
58935892 RAID5_STRIPE_SHIFT (conf ), ctx -> sectors_to_do );
58945893 }
58955894
5896- out :
58975895 spin_unlock_irq (& sh -> stripe_lock );
5898- return ret ;
5896+ return 1 ;
58995897}
59005898
59015899enum reshape_loc {
@@ -5991,17 +5989,17 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
59915989 goto out_release ;
59925990 }
59935991
5994- if (test_bit (STRIPE_EXPANDING , & sh -> state ) ||
5995- !add_all_stripe_bios (conf , ctx , sh , bi , rw , previous )) {
5996- /*
5997- * Stripe is busy expanding or add failed due to
5998- * overlap. Flush everything and wait a while.
5999- */
5992+ if (test_bit (STRIPE_EXPANDING , & sh -> state )) {
60005993 md_wakeup_thread (mddev -> thread );
60015994 ret = STRIPE_SCHEDULE_AND_RETRY ;
60025995 goto out_release ;
60035996 }
60045997
5998+ if (!add_all_stripe_bios (conf , ctx , sh , bi , rw , previous )) {
5999+ ret = STRIPE_RETRY ;
6000+ goto out ;
6001+ }
6002+
60056003 if (stripe_can_batch (sh )) {
60066004 stripe_add_to_batch_list (conf , sh , ctx -> batch_last );
60076005 if (ctx -> batch_last )
@@ -6072,6 +6070,7 @@ static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
60726070static bool raid5_make_request (struct mddev * mddev , struct bio * bi )
60736071{
60746072 DEFINE_WAIT_FUNC (wait , woken_wake_function );
6073+ bool on_wq ;
60756074 struct r5conf * conf = mddev -> private ;
60766075 sector_t logical_sector ;
60776076 struct stripe_request_ctx ctx = {};
@@ -6145,11 +6144,15 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
61456144 * sequential IO pattern. We don't bother with the optimization when
61466145 * reshaping as the performance benefit is not worth the complexity.
61476146 */
6148- if (likely (conf -> reshape_progress == MaxSector ))
6147+ if (likely (conf -> reshape_progress == MaxSector )) {
61496148 logical_sector = raid5_bio_lowest_chunk_sector (conf , bi );
6149+ on_wq = false;
6150+ } else {
6151+ add_wait_queue (& conf -> wait_for_reshape , & wait );
6152+ on_wq = true;
6153+ }
61506154 s = (logical_sector - ctx .first_sector ) >> RAID5_STRIPE_SHIFT (conf );
61516155
6152- add_wait_queue (& conf -> wait_for_overlap , & wait );
61536156 while (1 ) {
61546157 res = make_stripe_request (mddev , conf , & ctx , logical_sector ,
61556158 bi );
@@ -6160,6 +6163,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
61606163 continue ;
61616164
61626165 if (res == STRIPE_SCHEDULE_AND_RETRY ) {
6166+ WARN_ON_ONCE (!on_wq );
61636167 /*
61646168 * Must release the reference to batch_last before
61656169 * scheduling and waiting for work to be done,
@@ -6184,7 +6188,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
61846188 logical_sector = ctx .first_sector +
61856189 (s << RAID5_STRIPE_SHIFT (conf ));
61866190 }
6187- remove_wait_queue (& conf -> wait_for_overlap , & wait );
6191+ if (unlikely (on_wq ))
6192+ remove_wait_queue (& conf -> wait_for_reshape , & wait );
61886193
61896194 if (ctx .batch_last )
61906195 raid5_release_stripe (ctx .batch_last );
@@ -6337,7 +6342,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
63376342 : (safepos < writepos && readpos > writepos )) ||
63386343 time_after (jiffies , conf -> reshape_checkpoint + 10 * HZ )) {
63396344 /* Cannot proceed until we've updated the superblock... */
6340- wait_event (conf -> wait_for_overlap ,
6345+ wait_event (conf -> wait_for_reshape ,
63416346 atomic_read (& conf -> reshape_stripes )== 0
63426347 || test_bit (MD_RECOVERY_INTR , & mddev -> recovery ));
63436348 if (atomic_read (& conf -> reshape_stripes ) != 0 )
@@ -6363,7 +6368,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
63636368 spin_lock_irq (& conf -> device_lock );
63646369 conf -> reshape_safe = mddev -> reshape_position ;
63656370 spin_unlock_irq (& conf -> device_lock );
6366- wake_up (& conf -> wait_for_overlap );
6371+ wake_up (& conf -> wait_for_reshape );
63676372 sysfs_notify_dirent_safe (mddev -> sysfs_completed );
63686373 }
63696374
@@ -6446,7 +6451,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
64466451 (sector_nr - mddev -> curr_resync_completed ) * 2
64476452 >= mddev -> resync_max - mddev -> curr_resync_completed ) {
64486453 /* Cannot proceed until we've updated the superblock... */
6449- wait_event (conf -> wait_for_overlap ,
6454+ wait_event (conf -> wait_for_reshape ,
64506455 atomic_read (& conf -> reshape_stripes ) == 0
64516456 || test_bit (MD_RECOVERY_INTR , & mddev -> recovery ));
64526457 if (atomic_read (& conf -> reshape_stripes ) != 0 )
@@ -6472,7 +6477,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
64726477 spin_lock_irq (& conf -> device_lock );
64736478 conf -> reshape_safe = mddev -> reshape_position ;
64746479 spin_unlock_irq (& conf -> device_lock );
6475- wake_up (& conf -> wait_for_overlap );
6480+ wake_up (& conf -> wait_for_reshape );
64766481 sysfs_notify_dirent_safe (mddev -> sysfs_completed );
64776482 }
64786483ret :
@@ -6507,7 +6512,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
65076512 }
65086513
65096514 /* Allow raid5_quiesce to complete */
6510- wait_event (conf -> wait_for_overlap , conf -> quiesce != 2 );
6515+ wait_event (conf -> wait_for_reshape , conf -> quiesce != 2 );
65116516
65126517 if (test_bit (MD_RECOVERY_RESHAPE , & mddev -> recovery ))
65136518 return reshape_request (mddev , sector_nr , skipped );
@@ -7493,7 +7498,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
74937498
74947499 init_waitqueue_head (& conf -> wait_for_quiescent );
74957500 init_waitqueue_head (& conf -> wait_for_stripe );
7496- init_waitqueue_head (& conf -> wait_for_overlap );
7501+ init_waitqueue_head (& conf -> wait_for_reshape );
74977502 INIT_LIST_HEAD (& conf -> handle_list );
74987503 INIT_LIST_HEAD (& conf -> loprio_list );
74997504 INIT_LIST_HEAD (& conf -> hold_list );
@@ -8552,7 +8557,7 @@ static void end_reshape(struct r5conf *conf)
85528557 !test_bit (In_sync , & rdev -> flags ))
85538558 rdev -> recovery_offset = MaxSector ;
85548559 spin_unlock_irq (& conf -> device_lock );
8555- wake_up (& conf -> wait_for_overlap );
8560+ wake_up (& conf -> wait_for_reshape );
85568561
85578562 mddev_update_io_opt (conf -> mddev ,
85588563 conf -> raid_disks - conf -> max_degraded );
@@ -8616,13 +8621,13 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
86168621 conf -> quiesce = 1 ;
86178622 unlock_all_device_hash_locks_irq (conf );
86188623 /* allow reshape to continue */
8619- wake_up (& conf -> wait_for_overlap );
8624+ wake_up (& conf -> wait_for_reshape );
86208625 } else {
86218626 /* re-enable writes */
86228627 lock_all_device_hash_locks_irq (conf );
86238628 conf -> quiesce = 0 ;
86248629 wake_up (& conf -> wait_for_quiescent );
8625- wake_up (& conf -> wait_for_overlap );
8630+ wake_up (& conf -> wait_for_reshape );
86268631 unlock_all_device_hash_locks_irq (conf );
86278632 }
86288633 log_quiesce (conf , quiesce );
@@ -8941,7 +8946,7 @@ static void raid5_prepare_suspend(struct mddev *mddev)
89418946{
89428947 struct r5conf * conf = mddev -> private ;
89438948
8944- wake_up (& conf -> wait_for_overlap );
8949+ wake_up (& conf -> wait_for_reshape );
89458950}
89468951
89478952static struct md_personality raid6_personality =
0 commit comments