Skip to content

Commit

Permalink
RDMA/rxe: Extend rxe_init_packet() to support frags
Browse files Browse the repository at this point in the history
Add a subroutine rxe_can_use_sg() to determine if a packet is
a candidate for a fragmented skb. Add a global variable rxe_use_sg
to control whether to support nonlinear skbs. Modify rxe_init_packet()
to test if the packet should use a fragmented skb. Fixup calls to
rxe_init_packet() to use the new API but disable creating nonlinear
skbs for now.

This is in preparation for using fragmented skbs.

Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
  • Loading branch information
Bob Pearson authored and intel-lab-lkp committed Oct 27, 2022
1 parent 0bd35d8 commit 8312fa3
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 15 deletions.
3 changes: 3 additions & 0 deletions drivers/infiniband/sw/rxe/rxe.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");

/* if true allow using fragmented skbs */
bool rxe_use_sg;

/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
Expand Down
3 changes: 3 additions & 0 deletions drivers/infiniband/sw/rxe/rxe.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
#include "rxe_verbs.h"
#include "rxe_loc.h"

/* if true allow using fragmented skbs */
extern bool rxe_use_sg;

/*
* Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit
* machines Version 2 has a different struct layout.
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/sw/rxe/rxe_loc.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void rxe_mw_cleanup(struct rxe_pool_elem *elem);

/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
struct rxe_pkt_info *pkt);
struct rxe_pkt_info *pkt, bool *is_frag);
int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
Expand Down
12 changes: 8 additions & 4 deletions drivers/infiniband/sw/rxe/rxe_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
struct rxe_mr *mr = NULL;
int bytes;
u64 iova;
int ret;
int nf;
int num_frags = 0;

if (length == 0)
Expand Down Expand Up @@ -572,18 +572,22 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
bytes = min_t(int, length, sge->length - buf_offset);
if (bytes > 0) {
iova = sge->addr + buf_offset;
ret = rxe_num_mr_frags(mr, iova, length);
if (ret < 0) {
nf = rxe_num_mr_frags(mr, iova, length);
if (nf < 0) {
rxe_put(mr);
return ret;
return nf;
}

num_frags += nf;
buf_offset += bytes;
resid -= bytes;
length -= bytes;
}
}

if (mr)
rxe_put(mr);

return num_frags;
}

Expand Down
79 changes: 71 additions & 8 deletions drivers/infiniband/sw/rxe/rxe_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,15 +442,68 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
return err;
}

/**
* rxe_can_use_sg() - determine if packet is a candidate for fragmenting
* @rxe: the rxe device
* @pkt: packet info
*
* Limit to packets with:
* rxe_use_sg set
* qp is RC
* ndev supports SG
* #sges less than #frags for sends
*
* Returns: true if conditions are met else 0
*/
static bool rxe_can_use_sg(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
int length = pkt->paylen - rxe_opcode[pkt->opcode].length
- RXE_ICRC_SIZE;
int nf;

if (!rxe_use_sg)
return false;
if (qp_type(pkt->qp) != IB_QPT_RC)
return false;
if (!(rxe->ndev->features & NETIF_F_SG))
return false;

/* check we don't have a pathological sge list with lots of
* short segments. Recall we need one extra frag for icrc.
*/
if (pkt->mask & RXE_SEND_MASK) {
nf = rxe_num_dma_frags(qp->pd, &pkt->wqe->dma, length);
return (nf >= 0 && nf <= MAX_SKB_FRAGS - 1) ? true : false;
}

return true;
}

#define RXE_MIN_SKB_SIZE (256)

/**
* rxe_init_packet - allocate and initialize a new skb
* @qp: the queue pair
* @av: remote address vector
* @pkt: packet info
* @frag: optional return value for fragmented skb
* on call if frag == NULL do not use fragmented skb
* on return if not NULL set *frag to 1
* if packet will be fragmented else 0
*
* Returns: an skb on success else NULL
*/
struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
struct rxe_pkt_info *pkt)
struct rxe_pkt_info *pkt, bool *frag)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
unsigned int hdr_len;
struct sk_buff *skb = NULL;
struct net_device *ndev;
const struct ib_gid_attr *attr;
const int port_num = 1;
int skb_size;

attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
if (IS_ERR(attr))
Expand All @@ -469,9 +522,19 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
rcu_read_unlock();
goto out;
}
skb = alloc_skb(pkt->paylen + hdr_len + LL_RESERVED_SPACE(ndev),
GFP_ATOMIC);

skb_size = LL_RESERVED_SPACE(ndev) + hdr_len + pkt->paylen;
if (frag) {
if (rxe_use_sg && (skb_size > RXE_MIN_SKB_SIZE) &&
rxe_can_use_sg(qp, pkt)) {
skb_size = RXE_MIN_SKB_SIZE;
*frag = true;
} else {
*frag = false;
}
}

skb = alloc_skb(skb_size, GFP_ATOMIC);
if (unlikely(!skb)) {
rcu_read_unlock();
goto out;
Expand All @@ -480,18 +543,18 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));

/* FIXME: hold reference to this netdev until life of this skb. */
skb->dev = ndev;
skb->dev = ndev;
rcu_read_unlock();

if (av->network_type == RXE_NETWORK_TYPE_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);

pkt->rxe = rxe;
pkt->port_num = port_num;
pkt->hdr = skb_put(skb, pkt->paylen);
pkt->mask |= RXE_GRH_MASK;
if (frag && *frag)
pkt->hdr = skb_put(skb, rxe_opcode[pkt->opcode].length);
else
pkt->hdr = skb_put(skb, pkt->paylen);

out:
rdma_put_gid_attr(attr);
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/sw/rxe/rxe_req.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
pad + RXE_ICRC_SIZE;

/* init skb */
skb = rxe_init_packet(qp, av, pkt);
skb = rxe_init_packet(qp, av, pkt, NULL);
if (unlikely(!skb))
goto err_out;

Expand Down
5 changes: 4 additions & 1 deletion drivers/infiniband/sw/rxe/rxe_resp.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
u32 psn,
u8 syndrome)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
int paylen;
int pad;
Expand All @@ -673,13 +674,15 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
pad = (-payload) & 0x3;
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;

ack->rxe = rxe;
ack->qp = qp;
ack->opcode = opcode;
ack->mask = rxe_opcode[opcode].mask;
ack->paylen = paylen;
ack->psn = psn;
ack->port_num = 1;

skb = rxe_init_packet(qp, &qp->pri_av, ack);
skb = rxe_init_packet(qp, &qp->pri_av, ack, NULL);
if (!skb)
return NULL;

Expand Down

0 comments on commit 8312fa3

Please sign in to comment.