Skip to content

Commit c6f02eb

Browse files
karstengrdavem330
authored andcommitted
net/smc: switch connections to alternate link
Add smc_switch_conns() to switch all connections from a link that is going down. Find an other link to switch the connections to, and switch each connection to the new link. smc_switch_cursor() updates the cursors of a connection to the state of the last successfully sent CDC message. When there is no link to switch to, terminate the link group. Call smc_switch_conns() when a link is going down. And with the possibility that links of connections can switch adapt CDC and TX functions to detect and handle link switches. Signed-off-by: Karsten Graul <kgraul@linux.ibm.com> Reviewed-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent f0ec4f1 commit c6f02eb

File tree

6 files changed

+162
-9
lines changed

6 files changed

+162
-9
lines changed

net/smc/smc_cdc.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
5656
}
5757

5858
int smc_cdc_get_free_slot(struct smc_connection *conn,
59+
struct smc_link *link,
5960
struct smc_wr_buf **wr_buf,
6061
struct smc_rdma_wr **wr_rdma_buf,
6162
struct smc_cdc_tx_pend **pend)
6263
{
63-
struct smc_link *link = conn->lnk;
6464
int rc;
6565

6666
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -119,13 +119,27 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
119119
{
120120
struct smc_cdc_tx_pend *pend;
121121
struct smc_wr_buf *wr_buf;
122+
struct smc_link *link;
123+
bool again = false;
122124
int rc;
123125

124-
rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
126+
again:
127+
link = conn->lnk;
128+
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
125129
if (rc)
126130
return rc;
127131

128132
spin_lock_bh(&conn->send_lock);
133+
if (link != conn->lnk) {
134+
/* link of connection changed, try again one time*/
135+
spin_unlock_bh(&conn->send_lock);
136+
smc_wr_tx_put_slot(link,
137+
(struct smc_wr_tx_pend_priv *)pend);
138+
if (again)
139+
return -ENOLINK;
140+
again = true;
141+
goto again;
142+
}
129143
rc = smc_cdc_msg_send(conn, wr_buf, pend);
130144
spin_unlock_bh(&conn->send_lock);
131145
return rc;

net/smc/smc_cdc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ struct smc_cdc_tx_pend {
304304
};
305305

306306
int smc_cdc_get_free_slot(struct smc_connection *conn,
307+
struct smc_link *link,
307308
struct smc_wr_buf **wr_buf,
308309
struct smc_rdma_wr **wr_rdma_buf,
309310
struct smc_cdc_tx_pend **pend);

net/smc/smc_core.c

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,135 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
432432
return rc;
433433
}
434434

435+
static int smc_write_space(struct smc_connection *conn)
436+
{
437+
int buffer_len = conn->peer_rmbe_size;
438+
union smc_host_cursor prod;
439+
union smc_host_cursor cons;
440+
int space;
441+
442+
smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
443+
smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
444+
/* determine rx_buf space */
445+
space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
446+
return space;
447+
}
448+
449+
static int smc_switch_cursor(struct smc_sock *smc)
450+
{
451+
struct smc_connection *conn = &smc->conn;
452+
union smc_host_cursor cons, fin;
453+
int rc = 0;
454+
int diff;
455+
456+
smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
457+
smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
458+
/* set prod cursor to old state, enforce tx_rdma_writes() */
459+
smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
460+
smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
461+
462+
if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
463+
/* cons cursor advanced more than fin, and prod was set
464+
* fin above, so now prod is smaller than cons. Fix that.
465+
*/
466+
diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
467+
smc_curs_add(conn->sndbuf_desc->len,
468+
&conn->tx_curs_sent, diff);
469+
smc_curs_add(conn->sndbuf_desc->len,
470+
&conn->tx_curs_fin, diff);
471+
472+
smp_mb__before_atomic();
473+
atomic_add(diff, &conn->sndbuf_space);
474+
smp_mb__after_atomic();
475+
476+
smc_curs_add(conn->peer_rmbe_size,
477+
&conn->local_tx_ctrl.prod, diff);
478+
smc_curs_add(conn->peer_rmbe_size,
479+
&conn->local_tx_ctrl_fin, diff);
480+
}
481+
/* recalculate, value is used by tx_rdma_writes() */
482+
atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
483+
484+
if (smc->sk.sk_state != SMC_INIT &&
485+
smc->sk.sk_state != SMC_CLOSED) {
486+
/* tbd: call rc = smc_cdc_get_slot_and_msg_send(conn); */
487+
if (!rc) {
488+
schedule_delayed_work(&conn->tx_work, 0);
489+
smc->sk.sk_data_ready(&smc->sk);
490+
}
491+
}
492+
return rc;
493+
}
494+
495+
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
496+
struct smc_link *from_lnk, bool is_dev_err)
497+
{
498+
struct smc_link *to_lnk = NULL;
499+
struct smc_connection *conn;
500+
struct smc_sock *smc;
501+
struct rb_node *node;
502+
int i, rc = 0;
503+
504+
/* link is inactive, wake up tx waiters */
505+
smc_wr_wakeup_tx_wait(from_lnk);
506+
507+
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
508+
if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
509+
i == from_lnk->link_idx)
510+
continue;
511+
if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
512+
from_lnk->ibport == lgr->lnk[i].ibport) {
513+
continue;
514+
}
515+
to_lnk = &lgr->lnk[i];
516+
break;
517+
}
518+
if (!to_lnk) {
519+
smc_lgr_terminate_sched(lgr);
520+
return NULL;
521+
}
522+
again:
523+
read_lock_bh(&lgr->conns_lock);
524+
for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
525+
conn = rb_entry(node, struct smc_connection, alert_node);
526+
if (conn->lnk != from_lnk)
527+
continue;
528+
smc = container_of(conn, struct smc_sock, conn);
529+
/* conn->lnk not yet set in SMC_INIT state */
530+
if (smc->sk.sk_state == SMC_INIT)
531+
continue;
532+
if (smc->sk.sk_state == SMC_CLOSED ||
533+
smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
534+
smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
535+
smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
536+
smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
537+
smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
538+
smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
539+
smc->sk.sk_state == SMC_PEERABORTWAIT ||
540+
smc->sk.sk_state == SMC_PROCESSABORT) {
541+
spin_lock_bh(&conn->send_lock);
542+
conn->lnk = to_lnk;
543+
spin_unlock_bh(&conn->send_lock);
544+
continue;
545+
}
546+
sock_hold(&smc->sk);
547+
read_unlock_bh(&lgr->conns_lock);
548+
/* avoid race with smcr_tx_sndbuf_nonempty() */
549+
spin_lock_bh(&conn->send_lock);
550+
conn->lnk = to_lnk;
551+
rc = smc_switch_cursor(smc);
552+
spin_unlock_bh(&conn->send_lock);
553+
sock_put(&smc->sk);
554+
if (rc) {
555+
smcr_link_down_cond_sched(to_lnk);
556+
return NULL;
557+
}
558+
goto again;
559+
}
560+
read_unlock_bh(&lgr->conns_lock);
561+
return to_lnk;
562+
}
563+
435564
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
436565
struct smc_link_group *lgr)
437566
{
@@ -943,8 +1072,7 @@ static void smcr_link_down(struct smc_link *lnk)
9431072
return;
9441073

9451074
smc_ib_modify_qp_reset(lnk);
946-
to_lnk = NULL;
947-
/* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
1075+
to_lnk = smc_switch_conns(lgr, lnk, true);
9481076
if (!to_lnk) { /* no backup link available */
9491077
smcr_link_clear(lnk);
9501078
return;

net/smc/smc_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ void smcr_link_clear(struct smc_link *lnk);
380380
int smcr_buf_map_lgr(struct smc_link *lnk);
381381
int smcr_buf_reg_lgr(struct smc_link *lnk);
382382
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
383+
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
384+
struct smc_link *from_lnk, bool is_dev_err);
383385
void smcr_link_down_cond(struct smc_link *lnk);
384386
void smcr_link_down_cond_sched(struct smc_link *lnk);
385387

net/smc/smc_llc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
933933
return; /* no asymmetric link */
934934
if (!smc_link_downing(&lnk_asym->state))
935935
return;
936-
/* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */
936+
lnk_new = smc_switch_conns(lgr, lnk_asym, false);
937937
smc_wr_tx_wait_no_pending_sends(lnk_asym);
938938
if (!lnk_new)
939939
goto out_free;
@@ -1195,7 +1195,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
11951195
smc_llc_send_message(lnk, &qentry->msg); /* response */
11961196

11971197
if (smc_link_downing(&lnk_del->state)) {
1198-
/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
1198+
smc_switch_conns(lgr, lnk_del, false);
11991199
smc_wr_tx_wait_no_pending_sends(lnk_del);
12001200
}
12011201
smcr_link_clear(lnk_del);
@@ -1245,7 +1245,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
12451245
goto out; /* asymmetric link already deleted */
12461246

12471247
if (smc_link_downing(&lnk_del->state)) {
1248-
/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
1248+
smc_switch_conns(lgr, lnk_del, false);
12491249
smc_wr_tx_wait_no_pending_sends(lnk_del);
12501250
}
12511251
if (!list_empty(&lgr->list)) {

net/smc/smc_tx.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
482482
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
483483
{
484484
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
485+
struct smc_link *link = conn->lnk;
485486
struct smc_rdma_wr *wr_rdma_buf;
486487
struct smc_cdc_tx_pend *pend;
487488
struct smc_wr_buf *wr_buf;
488489
int rc;
489490

490-
rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
491+
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
491492
if (rc < 0) {
492493
if (rc == -EBUSY) {
493494
struct smc_sock *smc =
@@ -505,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
505506
}
506507

507508
spin_lock_bh(&conn->send_lock);
509+
if (link != conn->lnk) {
510+
/* link of connection changed, tx_work will restart */
511+
smc_wr_tx_put_slot(link,
512+
(struct smc_wr_tx_pend_priv *)pend);
513+
rc = -ENOLINK;
514+
goto out_unlock;
515+
}
508516
if (!pflags->urg_data_present) {
509517
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
510518
if (rc) {
511-
smc_wr_tx_put_slot(conn->lnk,
519+
smc_wr_tx_put_slot(link,
512520
(struct smc_wr_tx_pend_priv *)pend);
513521
goto out_unlock;
514522
}

0 commit comments

Comments
 (0)