Skip to content

Commit 9dd0524

Browse files
larrchjgunthorpe
authored andcommitted
RDMA/hns: Allocate one more recv SGE for HIP08
The RQ/SRQ of HIP08 needs one special sge to stop receive reliably. So the driver needs to allocate at least one SGE when creating RQ/SRQ and ensure that at least one SGE is filled with the special value during post_recv. Besides, the kernel driver should only do this for kernel ULP. For userspace ULP, the userspace driver will allocate the reserved SGE in buffer, and the kernel driver just needs to pin the corresponding size of memory based on the userspace driver's requirements. Link: https://lore.kernel.org/r/1611997090-48820-2-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng <chenglang@huawei.com> Signed-off-by: Weihang Li <liweihang@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 899aba8 commit 9dd0524

File tree

5 files changed

+93
-19
lines changed

5 files changed

+93
-19
lines changed

drivers/infiniband/hw/hns/hns_roce_device.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
6666
#define HNS_ROCE_MIN_CQE_CNT 16
6767

68+
#define HNS_ROCE_RESERVED_SGE 1
69+
6870
#define HNS_ROCE_MAX_IRQ_NUM 128
6971

7072
#define HNS_ROCE_SGE_IN_WQE 2
@@ -395,6 +397,7 @@ struct hns_roce_wq {
395397
spinlock_t lock;
396398
u32 wqe_cnt; /* WQE num */
397399
u32 max_gs;
400+
u32 rsv_sge;
398401
int offset;
399402
int wqe_shift; /* WQE size */
400403
u32 head;
@@ -498,6 +501,7 @@ struct hns_roce_srq {
498501
unsigned long srqn;
499502
u32 wqe_cnt;
500503
int max_gs;
504+
u32 rsv_sge;
501505
int wqe_shift;
502506
void __iomem *db_reg_l;
503507

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
741741
unsigned long flags;
742742
void *wqe = NULL;
743743
u32 wqe_idx;
744+
u32 max_sge;
744745
int nreq;
745746
int ret;
746747
int i;
@@ -754,6 +755,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
754755
goto out;
755756
}
756757

758+
max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
757759
for (nreq = 0; wr; ++nreq, wr = wr->next) {
758760
if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
759761
hr_qp->ibqp.recv_cq))) {
@@ -764,9 +766,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
764766

765767
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
766768

767-
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
769+
if (unlikely(wr->num_sge > max_sge)) {
768770
ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
769-
wr->num_sge, hr_qp->rq.max_gs);
771+
wr->num_sge, max_sge);
770772
ret = -EINVAL;
771773
*bad_wr = wr;
772774
goto out;
@@ -781,9 +783,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
781783
dseg++;
782784
}
783785

784-
if (wr->num_sge < hr_qp->rq.max_gs) {
786+
if (hr_qp->rq.rsv_sge) {
785787
dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
786788
dseg->addr = 0;
789+
dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
787790
}
788791

789792
/* rq support inline data */
@@ -879,16 +882,21 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
879882
__le32 *srq_idx;
880883
int ret = 0;
881884
int wqe_idx;
885+
u32 max_sge;
882886
void *wqe;
883887
int nreq;
884888
int i;
885889

886890
spin_lock_irqsave(&srq->lock, flags);
887891

888892
ind = srq->head & (srq->wqe_cnt - 1);
893+
max_sge = srq->max_gs - srq->rsv_sge;
889894

890895
for (nreq = 0; wr; ++nreq, wr = wr->next) {
891-
if (unlikely(wr->num_sge >= srq->max_gs)) {
896+
if (unlikely(wr->num_sge > max_sge)) {
897+
ibdev_err(&hr_dev->ib_dev,
898+
"srq: num_sge = %d, max_sge = %u.\n",
899+
wr->num_sge, max_sge);
892900
ret = -EINVAL;
893901
*bad_wr = wr;
894902
break;
@@ -916,9 +924,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
916924
dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
917925
}
918926

919-
if (wr->num_sge < srq->max_gs) {
920-
dseg[i].len = 0;
921-
dseg[i].lkey = cpu_to_le32(0x100);
927+
if (srq->rsv_sge) {
928+
dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
929+
dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
922930
dseg[i].addr = 0;
923931
}
924932

@@ -1999,10 +2007,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
19992007
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
20002008
caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
20012009
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
2010+
caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
20022011
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
20032012
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
20042013
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
20052014
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
2015+
caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
20062016
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
20072017
caps->num_other_vectors = resp_a->num_other_vectors;
20082018
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
@@ -5071,7 +5081,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
50715081
done:
50725082
qp_attr->cur_qp_state = qp_attr->qp_state;
50735083
qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
5074-
qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
5084+
qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
50755085

50765086
if (!ibqp->uobject) {
50775087
qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
@@ -5383,7 +5393,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
53835393

53845394
attr->srq_limit = limit_wl;
53855395
attr->max_wr = srq->wqe_cnt - 1;
5386-
attr->max_sge = srq->max_gs;
5396+
attr->max_sge = srq->max_gs - srq->rsv_sge;
53875397

53885398
out:
53895399
hns_roce_free_cmd_mailbox(hr_dev, mailbox);

drivers/infiniband/hw/hns/hns_roce_hw_v2.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@
9696
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
9797
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
9898
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
99-
#define HNS_ROCE_INVALID_LKEY 0x100
99+
#define HNS_ROCE_INVALID_LKEY 0x0
100+
#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
100101
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
101102
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
102103
#define HNS_ROCE_V2_RSV_QPS 8

drivers/infiniband/hw/hns/hns_roce_qp.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
413413
spin_unlock(&hr_dev->qp_table.bank_lock);
414414
}
415415

416+
static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
417+
bool user)
418+
{
419+
u32 max_sge = dev->caps.max_rq_sg;
420+
421+
if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
422+
return max_sge;
423+
424+
/* Reserve SGEs only for HIP08 in kernel; The userspace driver will
425+
* calculate number of max_sge with reserved SGEs when allocating wqe
426+
* buf, so there is no need to do this again in kernel. But the number
427+
* may exceed the capacity of SGEs recorded in the firmware, so the
428+
* kernel driver should just adapt the value accordingly.
429+
*/
430+
if (user)
431+
max_sge = roundup_pow_of_two(max_sge + 1);
432+
else
433+
hr_qp->rq.rsv_sge = 1;
434+
435+
return max_sge;
436+
}
437+
416438
static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
417-
struct hns_roce_qp *hr_qp, int has_rq)
439+
struct hns_roce_qp *hr_qp, int has_rq, bool user)
418440
{
441+
u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
419442
u32 cnt;
420443

421444
/* If srq exist, set zero for relative number of rq */
@@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
431454

432455
/* Check the validity of QP support capacity */
433456
if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
434-
cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
435-
ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
457+
cap->max_recv_sge > max_sge) {
458+
ibdev_err(&hr_dev->ib_dev,
459+
"RQ config error, depth = %u, sge = %u\n",
436460
cap->max_recv_wr, cap->max_recv_sge);
437461
return -EINVAL;
438462
}
@@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
444468
return -EINVAL;
445469
}
446470

447-
hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
471+
hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
472+
hr_qp->rq.rsv_sge);
448473

449474
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
450475
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
459484
hr_qp->rq_inl_buf.wqe_cnt = 0;
460485

461486
cap->max_recv_wr = cnt;
462-
cap->max_recv_sge = hr_qp->rq.max_gs;
487+
cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
463488

464489
return 0;
465490
}
@@ -918,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
918943
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
919944

920945
ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
921-
hns_roce_qp_has_rq(init_attr));
946+
hns_roce_qp_has_rq(init_attr), !!udata);
922947
if (ret) {
923948
ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
924949
ret);

drivers/infiniband/hw/hns/hns_roce_srq.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (c) 2018 Hisilicon Limited.
44
*/
55

6+
#include <linux/pci.h>
67
#include <rdma/ib_umem.h>
78
#include "hns_roce_device.h"
89
#include "hns_roce_cmd.h"
@@ -277,6 +278,28 @@ static void free_srq_wrid(struct hns_roce_srq *srq)
277278
srq->wrid = NULL;
278279
}
279280

281+
static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
282+
bool user)
283+
{
284+
u32 max_sge = dev->caps.max_srq_sges;
285+
286+
if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
287+
return max_sge;
288+
289+
/* Reserve SGEs only for HIP08 in kernel; The userspace driver will
290+
* calculate number of max_sge with reserved SGEs when allocating wqe
291+
* buf, so there is no need to do this again in kernel. But the number
292+
* may exceed the capacity of SGEs recorded in the firmware, so the
293+
* kernel driver should just adapt the value accordingly.
294+
*/
295+
if (user)
296+
max_sge = roundup_pow_of_two(max_sge + 1);
297+
else
298+
hr_srq->rsv_sge = 1;
299+
300+
return max_sge;
301+
}
302+
280303
int hns_roce_create_srq(struct ib_srq *ib_srq,
281304
struct ib_srq_init_attr *init_attr,
282305
struct ib_udata *udata)
@@ -286,23 +309,32 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
286309
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
287310
struct ib_device *ibdev = &hr_dev->ib_dev;
288311
struct hns_roce_ib_create_srq ucmd = {};
312+
u32 max_sge;
289313
int ret;
290314
u32 cqn;
291315

292316
if (init_attr->srq_type != IB_SRQT_BASIC &&
293317
init_attr->srq_type != IB_SRQT_XRC)
294318
return -EOPNOTSUPP;
295319

296-
/* Check the actual SRQ wqe and SRQ sge num */
320+
max_sge = proc_srq_sge(hr_dev, srq, !!udata);
321+
297322
if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
298-
init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
323+
init_attr->attr.max_sge > max_sge) {
324+
ibdev_err(&hr_dev->ib_dev,
325+
"SRQ config error, depth = %u, sge = %d\n",
326+
init_attr->attr.max_wr, init_attr->attr.max_sge);
299327
return -EINVAL;
328+
}
300329

301330
mutex_init(&srq->mutex);
302331
spin_lock_init(&srq->lock);
303332

304333
srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
305-
srq->max_gs = init_attr->attr.max_sge;
334+
srq->max_gs =
335+
roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge);
336+
init_attr->attr.max_wr = srq->wqe_cnt;
337+
init_attr->attr.max_sge = srq->max_gs;
306338

307339
if (udata) {
308340
ret = ib_copy_from_udata(&ucmd, udata,
@@ -349,6 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
349381

350382
srq->event = hns_roce_ib_srq_event;
351383
resp.srqn = srq->srqn;
384+
srq->max_gs = init_attr->attr.max_sge;
385+
init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge;
352386

353387
if (udata) {
354388
ret = ib_copy_to_udata(udata, &resp,

0 commit comments

Comments
 (0)