Skip to content

Commit f605f26

Browse files
Bob Pearsonjgunthorpe
authored andcommitted
RDMA/rxe: Protect QP state with qp->state_lock
Currently the rxe driver makes little effort to make the changes to qp state (which includes qp->attr.qp_state, qp->attr.sq_draining and qp->valid) atomic between different client threads and IO threads. In particular a common template is for an RDMA application to call ib_modify_qp() to move a qp to ERR state and then wait until all the packet and work queues have drained before calling ib_destroy_qp(). None of these state changes are protected by locks to assure that the changes are executed atomically and that memory barriers are included. This has been observed to lead to incorrect behavior around qp cleanup. This patch continues the work of the previous patches in this series and adds locking code around qp state changes and lookups. Link: https://lore.kernel.org/r/20230405042611.6467-5-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 7b560b8 commit f605f26

File tree

7 files changed

+317
-218
lines changed

7 files changed

+317
-218
lines changed

drivers/infiniband/sw/rxe/rxe_comp.c

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,12 @@ void retransmit_timer(struct timer_list *t)
118118

119119
rxe_dbg_qp(qp, "retransmit timer fired\n");
120120

121+
spin_lock_bh(&qp->state_lock);
121122
if (qp->valid) {
122123
qp->comp.timeout = 1;
123124
rxe_sched_task(&qp->comp.task);
124125
}
126+
spin_unlock_bh(&qp->state_lock);
125127
}
126128

127129
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
@@ -479,9 +481,8 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
479481

480482
static void comp_check_sq_drain_done(struct rxe_qp *qp)
481483
{
484+
spin_lock_bh(&qp->state_lock);
482485
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
483-
/* state_lock used by requester & completer */
484-
spin_lock_bh(&qp->state_lock);
485486
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
486487
qp->attr.sq_draining = 0;
487488
spin_unlock_bh(&qp->state_lock);
@@ -497,8 +498,8 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
497498
}
498499
return;
499500
}
500-
spin_unlock_bh(&qp->state_lock);
501501
}
502+
spin_unlock_bh(&qp->state_lock);
502503
}
503504

504505
static inline enum comp_state complete_ack(struct rxe_qp *qp,
@@ -614,6 +615,26 @@ static void free_pkt(struct rxe_pkt_info *pkt)
614615
ib_device_put(dev);
615616
}
616617

618+
/* reset the retry timer if
619+
* - QP is type RC
620+
* - there is a packet sent by the requester that
621+
* might be acked (we still might get spurious
622+
* timeouts but try to keep them as few as possible)
623+
* - the timeout parameter is set
624+
* - the QP is alive
625+
*/
626+
static void reset_retry_timer(struct rxe_qp *qp)
627+
{
628+
if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
629+
spin_lock_bh(&qp->state_lock);
630+
if (qp_state(qp) >= IB_QPS_RTS &&
631+
psn_compare(qp->req.psn, qp->comp.psn) > 0)
632+
mod_timer(&qp->retrans_timer,
633+
jiffies + qp->qp_timeout_jiffies);
634+
spin_unlock_bh(&qp->state_lock);
635+
}
636+
}
637+
617638
int rxe_completer(struct rxe_qp *qp)
618639
{
619640
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
@@ -623,14 +644,17 @@ int rxe_completer(struct rxe_qp *qp)
623644
enum comp_state state;
624645
int ret;
625646

647+
spin_lock_bh(&qp->state_lock);
626648
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
627-
qp_state(qp) == IB_QPS_RESET) {
649+
qp_state(qp) == IB_QPS_RESET) {
628650
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
629651

630652
drain_resp_pkts(qp);
631653
flush_send_queue(qp, notify);
654+
spin_unlock_bh(&qp->state_lock);
632655
goto exit;
633656
}
657+
spin_unlock_bh(&qp->state_lock);
634658

635659
if (qp->comp.timeout) {
636660
qp->comp.timeout_retry = 1;
@@ -718,20 +742,7 @@ int rxe_completer(struct rxe_qp *qp)
718742
break;
719743
}
720744

721-
/* re reset the timeout counter if
722-
* (1) QP is type RC
723-
* (2) the QP is alive
724-
* (3) there is a packet sent by the requester that
725-
* might be acked (we still might get spurious
726-
* timeouts but try to keep them as few as possible)
727-
* (4) the timeout parameter is set
728-
*/
729-
if ((qp_type(qp) == IB_QPT_RC) &&
730-
(qp_state(qp) >= IB_QPS_RTS) &&
731-
(psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
732-
qp->qp_timeout_jiffies)
733-
mod_timer(&qp->retrans_timer,
734-
jiffies + qp->qp_timeout_jiffies);
745+
reset_retry_timer(qp);
735746
goto exit;
736747

737748
case COMPST_ERROR_RETRY:
@@ -793,6 +804,7 @@ int rxe_completer(struct rxe_qp *qp)
793804
*/
794805
qp->req.wait_for_rnr_timer = 1;
795806
rxe_dbg_qp(qp, "set rnr nak timer\n");
807+
// TODO who protects from destroy_qp??
796808
mod_timer(&qp->rnr_nak_timer,
797809
jiffies + rnrnak_jiffies(aeth_syn(pkt)
798810
& ~AETH_TYPE_MASK));

drivers/infiniband/sw/rxe/rxe_net.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,11 +413,14 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
413413
int is_request = pkt->mask & RXE_REQ_MASK;
414414
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
415415

416+
spin_lock_bh(&qp->state_lock);
416417
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
417418
(!is_request && (qp_state(qp) < IB_QPS_RTR))) {
419+
spin_unlock_bh(&qp->state_lock);
418420
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
419421
goto drop;
420422
}
423+
spin_unlock_bh(&qp->state_lock);
421424

422425
rxe_icrc_generate(skb, pkt);
423426

drivers/infiniband/sw/rxe/rxe_qp.c

Lines changed: 85 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
325325
if (err)
326326
goto err2;
327327

328+
spin_lock_bh(&qp->state_lock);
328329
qp->attr.qp_state = IB_QPS_RESET;
329330
qp->valid = 1;
331+
spin_unlock_bh(&qp->state_lock);
330332

331333
return 0;
332334

@@ -377,27 +379,9 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
377379
return 0;
378380
}
379381

380-
/* called by the modify qp verb, this routine checks all the parameters before
381-
* making any changes
382-
*/
383382
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
384383
struct ib_qp_attr *attr, int mask)
385384
{
386-
enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
387-
attr->cur_qp_state : qp->attr.qp_state;
388-
enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
389-
attr->qp_state : cur_state;
390-
391-
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
392-
rxe_dbg_qp(qp, "invalid mask or state\n");
393-
goto err1;
394-
}
395-
396-
if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) {
397-
if (qp->attr.sq_draining && new_state != IB_QPS_ERR)
398-
goto err1;
399-
}
400-
401385
if (mask & IB_QP_PORT) {
402386
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
403387
rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
@@ -508,22 +492,96 @@ static void rxe_qp_reset(struct rxe_qp *qp)
508492
/* move the qp to the error state */
509493
void rxe_qp_error(struct rxe_qp *qp)
510494
{
495+
spin_lock_bh(&qp->state_lock);
511496
qp->attr.qp_state = IB_QPS_ERR;
512497

513498
/* drain work and packet queues */
514499
rxe_sched_task(&qp->resp.task);
515500
rxe_sched_task(&qp->comp.task);
516501
rxe_sched_task(&qp->req.task);
502+
spin_unlock_bh(&qp->state_lock);
503+
}
504+
505+
static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
506+
int mask)
507+
{
508+
spin_lock_bh(&qp->state_lock);
509+
qp->attr.sq_draining = 1;
510+
rxe_sched_task(&qp->comp.task);
511+
rxe_sched_task(&qp->req.task);
512+
spin_unlock_bh(&qp->state_lock);
513+
}
514+
515+
/* caller should hold qp->state_lock */
516+
static int __qp_chk_state(struct rxe_qp *qp, struct ib_qp_attr *attr,
517+
int mask)
518+
{
519+
enum ib_qp_state cur_state;
520+
enum ib_qp_state new_state;
521+
522+
cur_state = (mask & IB_QP_CUR_STATE) ?
523+
attr->cur_qp_state : qp->attr.qp_state;
524+
new_state = (mask & IB_QP_STATE) ?
525+
attr->qp_state : cur_state;
526+
527+
if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask))
528+
return -EINVAL;
529+
530+
if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) {
531+
if (qp->attr.sq_draining && new_state != IB_QPS_ERR)
532+
return -EINVAL;
533+
}
534+
535+
return 0;
517536
}
518537

538+
static const char *const qps2str[] = {
539+
[IB_QPS_RESET] = "RESET",
540+
[IB_QPS_INIT] = "INIT",
541+
[IB_QPS_RTR] = "RTR",
542+
[IB_QPS_RTS] = "RTS",
543+
[IB_QPS_SQD] = "SQD",
544+
[IB_QPS_SQE] = "SQE",
545+
[IB_QPS_ERR] = "ERR",
546+
};
547+
519548
/* called by the modify qp verb */
520549
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
521550
struct ib_udata *udata)
522551
{
523-
enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
524-
attr->cur_qp_state : qp->attr.qp_state;
525552
int err;
526553

554+
if (mask & IB_QP_CUR_STATE)
555+
qp->attr.cur_qp_state = attr->qp_state;
556+
557+
if (mask & IB_QP_STATE) {
558+
spin_lock_bh(&qp->state_lock);
559+
err = __qp_chk_state(qp, attr, mask);
560+
if (!err) {
561+
qp->attr.qp_state = attr->qp_state;
562+
rxe_dbg_qp(qp, "state -> %s\n",
563+
qps2str[attr->qp_state]);
564+
}
565+
spin_unlock_bh(&qp->state_lock);
566+
567+
if (err)
568+
return err;
569+
570+
switch (attr->qp_state) {
571+
case IB_QPS_RESET:
572+
rxe_qp_reset(qp);
573+
break;
574+
case IB_QPS_SQD:
575+
rxe_qp_sqd(qp, attr, mask);
576+
break;
577+
case IB_QPS_ERR:
578+
rxe_qp_error(qp);
579+
break;
580+
default:
581+
break;
582+
}
583+
}
584+
527585
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
528586
int max_rd_atomic = attr->max_rd_atomic ?
529587
roundup_pow_of_two(attr->max_rd_atomic) : 0;
@@ -545,9 +603,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
545603
return err;
546604
}
547605

548-
if (mask & IB_QP_CUR_STATE)
549-
qp->attr.cur_qp_state = attr->qp_state;
550-
551606
if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
552607
qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
553608

@@ -627,48 +682,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
627682
if (mask & IB_QP_DEST_QPN)
628683
qp->attr.dest_qp_num = attr->dest_qp_num;
629684

630-
if (mask & IB_QP_STATE) {
631-
qp->attr.qp_state = attr->qp_state;
632-
633-
switch (attr->qp_state) {
634-
case IB_QPS_RESET:
635-
rxe_dbg_qp(qp, "state -> RESET\n");
636-
rxe_qp_reset(qp);
637-
break;
638-
639-
case IB_QPS_INIT:
640-
rxe_dbg_qp(qp, "state -> INIT\n");
641-
break;
642-
643-
case IB_QPS_RTR:
644-
rxe_dbg_qp(qp, "state -> RTR\n");
645-
break;
646-
647-
case IB_QPS_RTS:
648-
rxe_dbg_qp(qp, "state -> RTS\n");
649-
break;
650-
651-
case IB_QPS_SQD:
652-
rxe_dbg_qp(qp, "state -> SQD\n");
653-
if (cur_state != IB_QPS_SQD) {
654-
qp->attr.sq_draining = 1;
655-
rxe_sched_task(&qp->comp.task);
656-
rxe_sched_task(&qp->req.task);
657-
}
658-
break;
659-
660-
case IB_QPS_SQE:
661-
rxe_dbg_qp(qp, "state -> SQE !!?\n");
662-
/* Not possible from modify_qp. */
663-
break;
664-
665-
case IB_QPS_ERR:
666-
rxe_dbg_qp(qp, "state -> ERR\n");
667-
rxe_qp_error(qp);
668-
break;
669-
}
670-
}
671-
672685
return 0;
673686
}
674687

@@ -695,10 +708,12 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
695708
/* Applications that get this state typically spin on it.
696709
* Yield the processor
697710
*/
698-
if (qp->attr.sq_draining)
711+
spin_lock_bh(&qp->state_lock);
712+
if (qp->attr.sq_draining) {
713+
spin_unlock_bh(&qp->state_lock);
699714
cond_resched();
700-
701-
rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
715+
}
716+
spin_unlock_bh(&qp->state_lock);
702717

703718
return 0;
704719
}
@@ -722,7 +737,9 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
722737
{
723738
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
724739

740+
spin_lock_bh(&qp->state_lock);
725741
qp->valid = 0;
742+
spin_unlock_bh(&qp->state_lock);
726743
qp->qp_timeout_jiffies = 0;
727744

728745
if (qp_type(qp) == IB_QPT_RC) {

drivers/infiniband/sw/rxe/rxe_recv.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
3838
return -EINVAL;
3939
}
4040

41+
spin_lock_bh(&qp->state_lock);
4142
if (pkt->mask & RXE_REQ_MASK) {
42-
if (unlikely(qp_state(qp) < IB_QPS_RTR))
43+
if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
44+
spin_unlock_bh(&qp->state_lock);
4345
return -EINVAL;
46+
}
4447
} else {
45-
if (unlikely(qp_state(qp) < IB_QPS_RTS))
48+
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
49+
spin_unlock_bh(&qp->state_lock);
4650
return -EINVAL;
51+
}
4752
}
53+
spin_unlock_bh(&qp->state_lock);
4854

4955
return 0;
5056
}

0 commit comments

Comments
 (0)