@@ -600,7 +600,7 @@ static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len,
600600{
601601 struct dlm_rsb * r ;
602602
603- r = dlm_allocate_rsb (ls );
603+ r = dlm_allocate_rsb ();
604604 if (!r )
605605 return - ENOMEM ;
606606
@@ -733,11 +733,13 @@ static int find_rsb_dir(struct dlm_ls *ls, const void *name, int len,
733733 }
734734
735735 retry :
736+ error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
737+ if (error )
738+ goto do_new ;
736739
737740 /* check if the rsb is active under read lock - likely path */
738741 read_lock_bh (& ls -> ls_rsbtbl_lock );
739- error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
740- if (error ) {
742+ if (!rsb_flag (r , RSB_HASHED )) {
741743 read_unlock_bh (& ls -> ls_rsbtbl_lock );
742744 goto do_new ;
743745 }
@@ -918,11 +920,13 @@ static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
918920 int error ;
919921
920922 retry :
923+ error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
924+ if (error )
925+ goto do_new ;
921926
922927 /* check if the rsb is in active state under read lock - likely path */
923928 read_lock_bh (& ls -> ls_rsbtbl_lock );
924- error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
925- if (error ) {
929+ if (!rsb_flag (r , RSB_HASHED )) {
926930 read_unlock_bh (& ls -> ls_rsbtbl_lock );
927931 goto do_new ;
928932 }
@@ -1151,7 +1155,7 @@ static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_no
11511155 r -> res_dir_nodeid = our_nodeid ;
11521156 }
11531157
1154- if (fix_master && dlm_is_removed (ls , r -> res_master_nodeid )) {
1158+ if (fix_master && r -> res_master_nodeid && dlm_is_removed (ls , r -> res_master_nodeid )) {
11551159 /* Recovery uses this function to set a new master when
11561160 * the previous master failed. Setting NEW_MASTER will
11571161 * force dlm_recover_masters to call recover_master on this
@@ -1276,43 +1280,45 @@ static int _dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, const char *na
12761280 }
12771281
12781282 retry :
1283+ error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
1284+ if (error )
1285+ goto not_found ;
12791286
12801287 /* check if the rsb is active under read lock - likely path */
12811288 read_lock_bh (& ls -> ls_rsbtbl_lock );
1282- error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
1283- if (!error ) {
1284- if (rsb_flag (r , RSB_INACTIVE )) {
1285- read_unlock_bh (& ls -> ls_rsbtbl_lock );
1286- goto do_inactive ;
1287- }
1288-
1289- /* because the rsb is active, we need to lock_rsb before
1290- * checking/changing re_master_nodeid
1291- */
1289+ if (!rsb_flag (r , RSB_HASHED )) {
1290+ read_unlock_bh (& ls -> ls_rsbtbl_lock );
1291+ goto not_found ;
1292+ }
12921293
1293- hold_rsb ( r );
1294+ if ( rsb_flag ( r , RSB_INACTIVE )) {
12941295 read_unlock_bh (& ls -> ls_rsbtbl_lock );
1295- lock_rsb (r );
1296+ goto do_inactive ;
1297+ }
12961298
1297- __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , false,
1298- flags , r_nodeid , result );
1299+ /* because the rsb is active, we need to lock_rsb before
1300+ * checking/changing re_master_nodeid
1301+ */
12991302
1300- /* the rsb was active */
1301- unlock_rsb ( r );
1302- put_rsb (r );
1303+ hold_rsb ( r );
1304+ read_unlock_bh ( & ls -> ls_rsbtbl_lock );
1305+ lock_rsb (r );
13031306
1304- return 0 ;
1305- } else {
1306- read_unlock_bh (& ls -> ls_rsbtbl_lock );
1307- goto not_found ;
1308- }
1307+ __dlm_master_lookup (ls , r , our_nodeid , from_nodeid , false,
1308+ flags , r_nodeid , result );
1309+
1310+ /* the rsb was active */
1311+ unlock_rsb (r );
1312+ put_rsb (r );
1313+
1314+ return 0 ;
13091315
13101316 do_inactive :
1311- /* unlikely path - relookup under write */
1317+ /* unlikely path - check if still part of ls_rsbtbl */
13121318 write_lock_bh (& ls -> ls_rsbtbl_lock );
13131319
1314- error = dlm_search_rsb_tree ( & ls -> ls_rsbtbl , name , len , & r );
1315- if (! error ) {
1320+ /* see comment in find_rsb_dir */
1321+ if (rsb_flag ( r , RSB_HASHED ) ) {
13161322 if (!rsb_flag (r , RSB_INACTIVE )) {
13171323 write_unlock_bh (& ls -> ls_rsbtbl_lock );
13181324 /* something as changed, very unlikely but
@@ -1403,14 +1409,14 @@ void dlm_dump_rsb_name(struct dlm_ls *ls, const char *name, int len)
14031409 struct dlm_rsb * r = NULL ;
14041410 int error ;
14051411
1406- read_lock_bh ( & ls -> ls_rsbtbl_lock );
1412+ rcu_read_lock ( );
14071413 error = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
14081414 if (!error )
14091415 goto out ;
14101416
14111417 dlm_dump_rsb (r );
14121418 out :
1413- read_unlock_bh ( & ls -> ls_rsbtbl_lock );
1419+ rcu_read_unlock ( );
14141420}
14151421
14161422static void deactivate_rsb (struct kref * kref )
@@ -1442,18 +1448,6 @@ static void deactivate_rsb(struct kref *kref)
14421448 }
14431449}
14441450
1445- /* See comment for unhold_lkb */
1446-
1447- static void unhold_rsb (struct dlm_rsb * r )
1448- {
1449- int rv ;
1450-
1451- /* inactive rsbs are not ref counted */
1452- WARN_ON (rsb_flag (r , RSB_INACTIVE ));
1453- rv = kref_put (& r -> res_ref , deactivate_rsb );
1454- DLM_ASSERT (!rv , dlm_dump_rsb (r ););
1455- }
1456-
14571451void free_inactive_rsb (struct dlm_rsb * r )
14581452{
14591453 WARN_ON_ONCE (!rsb_flag (r , RSB_INACTIVE ));
@@ -1497,7 +1491,7 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
14971491 limit .max = end ;
14981492 limit .min = start ;
14991493
1500- lkb = dlm_allocate_lkb (ls );
1494+ lkb = dlm_allocate_lkb ();
15011495 if (!lkb )
15021496 return - ENOMEM ;
15031497
@@ -1533,11 +1527,21 @@ static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
15331527{
15341528 struct dlm_lkb * lkb ;
15351529
1536- read_lock_bh ( & ls -> ls_lkbxa_lock );
1530+ rcu_read_lock ( );
15371531 lkb = xa_load (& ls -> ls_lkbxa , lkid );
1538- if (lkb )
1539- kref_get (& lkb -> lkb_ref );
1540- read_unlock_bh (& ls -> ls_lkbxa_lock );
1532+ if (lkb ) {
1533+ /* check if lkb is still part of lkbxa under lkbxa_lock as
1534+ * the lkb_ref is tight to the lkbxa data structure, see
1535+ * __put_lkb().
1536+ */
1537+ read_lock_bh (& ls -> ls_lkbxa_lock );
1538+ if (kref_read (& lkb -> lkb_ref ))
1539+ kref_get (& lkb -> lkb_ref );
1540+ else
1541+ lkb = NULL ;
1542+ read_unlock_bh (& ls -> ls_lkbxa_lock );
1543+ }
1544+ rcu_read_unlock ();
15411545
15421546 * lkb_ret = lkb ;
15431547 return lkb ? 0 : - ENOENT ;
@@ -1675,10 +1679,8 @@ static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
16751679
16761680static void move_lkb (struct dlm_rsb * r , struct dlm_lkb * lkb , int sts )
16771681{
1678- hold_lkb (lkb );
16791682 del_lkb (r , lkb );
16801683 add_lkb (r , lkb , sts );
1681- unhold_lkb (lkb );
16821684}
16831685
16841686static int msg_reply_type (int mstype )
@@ -4323,16 +4325,27 @@ static void receive_remove(struct dlm_ls *ls, const struct dlm_message *ms)
43234325 memset (name , 0 , sizeof (name ));
43244326 memcpy (name , ms -> m_extra , len );
43254327
4326- write_lock_bh (& ls -> ls_rsbtbl_lock );
4327-
4328+ rcu_read_lock ();
43284329 rv = dlm_search_rsb_tree (& ls -> ls_rsbtbl , name , len , & r );
43294330 if (rv ) {
4331+ rcu_read_unlock ();
43304332 /* should not happen */
43314333 log_error (ls , "%s from %d not found %s" , __func__ ,
43324334 from_nodeid , name );
4335+ return ;
4336+ }
4337+
4338+ write_lock_bh (& ls -> ls_rsbtbl_lock );
4339+ if (!rsb_flag (r , RSB_HASHED )) {
4340+ rcu_read_unlock ();
43334341 write_unlock_bh (& ls -> ls_rsbtbl_lock );
4342+ /* should not happen */
4343+ log_error (ls , "%s from %d got removed during removal %s" ,
4344+ __func__ , from_nodeid , name );
43344345 return ;
43354346 }
4347+ /* at this stage the rsb can only being freed here */
4348+ rcu_read_unlock ();
43364349
43374350 if (!rsb_flag (r , RSB_INACTIVE )) {
43384351 if (r -> res_master_nodeid != from_nodeid ) {
@@ -5297,7 +5310,7 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
52975310 case DLM_MSG_LOOKUP :
52985311 case DLM_MSG_REQUEST :
52995312 _request_lock (r , lkb );
5300- if (is_master (r ))
5313+ if (r -> res_nodeid != -1 && is_master (r ))
53015314 confirm_master (r , 0 );
53025315 break ;
53035316 case DLM_MSG_CONVERT :
@@ -5409,9 +5422,8 @@ void dlm_recover_purge(struct dlm_ls *ls, const struct list_head *root_list)
54095422 return ;
54105423
54115424 list_for_each_entry (r , root_list , res_root_list ) {
5412- hold_rsb (r );
54135425 lock_rsb (r );
5414- if (is_master (r )) {
5426+ if (r -> res_nodeid != -1 && is_master (r )) {
54155427 purge_dead_list (ls , r , & r -> res_grantqueue ,
54165428 nodeid_gone , & lkb_count );
54175429 purge_dead_list (ls , r , & r -> res_convertqueue ,
@@ -5420,7 +5432,7 @@ void dlm_recover_purge(struct dlm_ls *ls, const struct list_head *root_list)
54205432 nodeid_gone , & lkb_count );
54215433 }
54225434 unlock_rsb (r );
5423- unhold_rsb ( r );
5435+
54245436 cond_resched ();
54255437 }
54265438
0 commit comments