Skip to content

Commit 72a36a8

Browse files
Hans Wippeldavem330
authored andcommitted
net/smc: use client and server LGR pending locks for SMC-R
If SMC client and server connections are both established at the same time, smc_connect_rdma() cannot send a CLC confirm message while smc_listen_work() is waiting for one due to lock contention. This can result in timeouts in smc_clc_wait_msg() and failed SMC connections. In case of SMC-R, there are two types of LGRs (client and server LGRs) which can be protected by separate locks. So, this patch splits the LGR pending lock into two separate locks for client and server to avoid the locking issue for SMC-R. Signed-off-by: Hans Wippel <hwippel@linux.ibm.com> Signed-off-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 62c7139 commit 72a36a8

File tree

1 file changed

+28
-16
lines changed

1 file changed

+28
-16
lines changed

net/smc/af_smc.c

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@
4242
#include "smc_rx.h"
4343
#include "smc_close.h"
4444

45-
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
46-
* creation
45+
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
46+
* creation on server
47+
*/
48+
static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
49+
* creation on client
4750
*/
4851

4952
static void smc_tcp_listen_work(struct work_struct *);
@@ -477,7 +480,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
477480
{
478481
if (local_contact == SMC_FIRST_CONTACT)
479482
smc_lgr_forget(smc->conn.lgr);
480-
mutex_unlock(&smc_create_lgr_pending);
483+
if (smc->conn.lgr->is_smcd)
484+
/* there is only one lgr role for SMC-D; use server lock */
485+
mutex_unlock(&smc_server_lgr_pending);
486+
else
487+
mutex_unlock(&smc_client_lgr_pending);
488+
481489
smc_conn_free(&smc->conn);
482490
return reason_code;
483491
}
@@ -562,7 +570,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
562570
struct smc_link *link;
563571
int reason_code = 0;
564572

565-
mutex_lock(&smc_create_lgr_pending);
573+
mutex_lock(&smc_client_lgr_pending);
566574
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
567575
ibport, ntoh24(aclc->qpn), &aclc->lcl,
568576
NULL, 0);
@@ -573,7 +581,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
573581
reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
574582
else
575583
reason_code = SMC_CLC_DECL_INTERR; /* other error */
576-
return smc_connect_abort(smc, reason_code, 0);
584+
mutex_unlock(&smc_client_lgr_pending);
585+
return reason_code;
577586
}
578587
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
579588

@@ -617,7 +626,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
617626
return smc_connect_abort(smc, reason_code,
618627
local_contact);
619628
}
620-
mutex_unlock(&smc_create_lgr_pending);
629+
mutex_unlock(&smc_client_lgr_pending);
621630

622631
smc_copy_sock_settings_to_clc(smc);
623632
if (smc->sk.sk_state == SMC_INIT)
@@ -634,11 +643,14 @@ static int smc_connect_ism(struct smc_sock *smc,
634643
int local_contact = SMC_FIRST_CONTACT;
635644
int rc = 0;
636645

637-
mutex_lock(&smc_create_lgr_pending);
646+
/* there is only one lgr role for SMC-D; use server lock */
647+
mutex_lock(&smc_server_lgr_pending);
638648
local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
639649
NULL, ismdev, aclc->gid);
640-
if (local_contact < 0)
641-
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
650+
if (local_contact < 0) {
651+
mutex_unlock(&smc_server_lgr_pending);
652+
return SMC_CLC_DECL_MEM;
653+
}
642654

643655
/* Create send and receive buffers */
644656
if (smc_buf_create(smc, true))
@@ -652,7 +664,7 @@ static int smc_connect_ism(struct smc_sock *smc,
652664
rc = smc_clc_send_confirm(smc);
653665
if (rc)
654666
return smc_connect_abort(smc, rc, local_contact);
655-
mutex_unlock(&smc_create_lgr_pending);
667+
mutex_unlock(&smc_server_lgr_pending);
656668

657669
smc_copy_sock_settings_to_clc(smc);
658670
if (smc->sk.sk_state == SMC_INIT)
@@ -1251,7 +1263,7 @@ static void smc_listen_work(struct work_struct *work)
12511263
return;
12521264
}
12531265

1254-
mutex_lock(&smc_create_lgr_pending);
1266+
mutex_lock(&smc_server_lgr_pending);
12551267
smc_close_init(new_smc);
12561268
smc_rx_init(new_smc);
12571269
smc_tx_init(new_smc);
@@ -1273,7 +1285,7 @@ static void smc_listen_work(struct work_struct *work)
12731285
&local_contact) ||
12741286
smc_listen_rdma_reg(new_smc, local_contact))) {
12751287
/* SMC not supported, decline */
1276-
mutex_unlock(&smc_create_lgr_pending);
1288+
mutex_unlock(&smc_server_lgr_pending);
12771289
smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
12781290
local_contact);
12791291
return;
@@ -1282,29 +1294,29 @@ static void smc_listen_work(struct work_struct *work)
12821294
/* send SMC Accept CLC message */
12831295
rc = smc_clc_send_accept(new_smc, local_contact);
12841296
if (rc) {
1285-
mutex_unlock(&smc_create_lgr_pending);
1297+
mutex_unlock(&smc_server_lgr_pending);
12861298
smc_listen_decline(new_smc, rc, local_contact);
12871299
return;
12881300
}
12891301

12901302
/* SMC-D does not need this lock any more */
12911303
if (ism_supported)
1292-
mutex_unlock(&smc_create_lgr_pending);
1304+
mutex_unlock(&smc_server_lgr_pending);
12931305

12941306
/* receive SMC Confirm CLC message */
12951307
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
12961308
SMC_CLC_CONFIRM, CLC_WAIT_TIME);
12971309
if (reason_code) {
12981310
if (!ism_supported)
1299-
mutex_unlock(&smc_create_lgr_pending);
1311+
mutex_unlock(&smc_server_lgr_pending);
13001312
smc_listen_decline(new_smc, reason_code, local_contact);
13011313
return;
13021314
}
13031315

13041316
/* finish worker */
13051317
if (!ism_supported) {
13061318
rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
1307-
mutex_unlock(&smc_create_lgr_pending);
1319+
mutex_unlock(&smc_server_lgr_pending);
13081320
if (rc)
13091321
return;
13101322
}

0 commit comments

Comments
 (0)