Skip to content

Commit 5e0d2ee

Browse files
Tariq ToukanSaeed Mahameed
authored andcommitted
net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE
Add support for the HW feature of multi-packet WQE in XDP xmit flow. The conventional TX descriptor (WQE, Work Queue Element) serves a single packet. Our HW has support for multi-packet WQE (MPWQE) in which a single descriptor serves multiple TX packets. This reduces both the PCI overhead and the CPU cycles wasted on writing them. In this patch we add support for the HW feature, which is supported starting from ConnectX-5. Performance: Tested packet rate for UDP 64Byte multi-stream over ConnectX-5 NICs. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz XDP_TX: We see a huge gain on single port ConnectX-5, and reach the 100 Mpps milestone. * Single-port HCA: Before: 70 Mpps After: 100 Mpps (+42.8%) * Dual-port HCA: Before: 51.7 Mpps After: 57.3 Mpps (+10.8%) * In both cases we tested traffic on one port and for now On Dual-port HCAs we see only small gain, we are working to overcome this bottleneck, but for the moment only with experimental firmware on dual port HCAs we can reach the wanted numbers as seen on Single-port HCAs. XDP_REDIRECT: Redirect from (A) ConnectX-5 to (B) ConnectX-5. Due to a setup limitation, (A) and (B) are on different NUMA nodes, so absolute performance numbers are not optimal. Note: Below is the transmit rate of (B), not the redirect rate of (A) which is in some cases higher. * (B) is single-port: Before: 77 Mpps After: 90 Mpps (+16.8%) * (B) is dual-port: Before: 61 Mpps After: 72 Mpps (+18%) Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
1 parent 1feeab8 commit 5e0d2ee

File tree

5 files changed

+174
-27
lines changed

5 files changed

+174
-27
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,16 @@ struct mlx5e_xdp_wqe_info {
416416
u8 num_ds;
417417
};
418418

419+
struct mlx5e_xdp_mpwqe {
420+
/* Current MPWQE session */
421+
struct mlx5e_tx_wqe *wqe;
422+
u8 ds_count;
423+
u8 max_ds_count;
424+
};
425+
426+
struct mlx5e_xdpsq;
427+
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
428+
struct mlx5e_xdp_info*);
419429
struct mlx5e_xdpsq {
420430
/* data path */
421431

@@ -428,12 +438,14 @@ struct mlx5e_xdpsq {
428438
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
429439
u16 pc;
430440
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
441+
struct mlx5e_xdp_mpwqe mpwqe;
431442

432443
struct mlx5e_cq cq;
433444

434445
/* read only */
435446
struct mlx5_wq_cyc wq;
436447
struct mlx5e_xdpsq_stats *stats;
448+
mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
437449
struct {
438450
struct mlx5e_xdp_wqe_info *wqe_info;
439451
struct mlx5e_xdp_info_fifo xdpi_fifo;

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
4747
xdpi.xdpf->len, PCI_DMA_TODEVICE);
4848
xdpi.di = *di;
4949

50-
return mlx5e_xmit_xdp_frame(sq, &xdpi);
50+
return sq->xmit_xdp_frame(sq, &xdpi);
5151
}
5252

5353
/* returns true if packet was consumed by xdp */
@@ -102,7 +102,98 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
102102
}
103103
}
104104

105-
bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
105+
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
106+
{
107+
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
108+
struct mlx5_wq_cyc *wq = &sq->wq;
109+
u8 wqebbs;
110+
u16 pi;
111+
112+
mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
113+
114+
prefetchw(session->wqe->data);
115+
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
116+
117+
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
118+
119+
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
120+
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
121+
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
122+
* full-session WQE be cache-aligned.
123+
*/
124+
#if L1_CACHE_BYTES < 128
125+
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
126+
#else
127+
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
128+
#endif
129+
130+
wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
131+
MLX5E_XDP_MPW_MAX_WQEBBS);
132+
133+
session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
134+
}
135+
136+
static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
137+
{
138+
struct mlx5_wq_cyc *wq = &sq->wq;
139+
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
140+
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
141+
u16 ds_count = session->ds_count;
142+
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
143+
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
144+
145+
cseg->opmod_idx_opcode =
146+
cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
147+
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
148+
149+
wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
150+
wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
151+
152+
sq->pc += wi->num_wqebbs;
153+
154+
sq->doorbell_cseg = cseg;
155+
156+
session->wqe = NULL; /* Close session */
157+
}
158+
159+
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
160+
struct mlx5e_xdp_info *xdpi)
161+
{
162+
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
163+
struct mlx5e_xdpsq_stats *stats = sq->stats;
164+
165+
dma_addr_t dma_addr = xdpi->dma_addr;
166+
struct xdp_frame *xdpf = xdpi->xdpf;
167+
unsigned int dma_len = xdpf->len;
168+
169+
if (unlikely(sq->hw_mtu < dma_len)) {
170+
stats->err++;
171+
return false;
172+
}
173+
174+
if (unlikely(!session->wqe)) {
175+
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
176+
MLX5_SEND_WQE_MAX_WQEBBS))) {
177+
/* SQ is full, ring doorbell */
178+
mlx5e_xmit_xdp_doorbell(sq);
179+
stats->full++;
180+
return false;
181+
}
182+
183+
mlx5e_xdp_mpwqe_session_start(sq);
184+
}
185+
186+
mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
187+
188+
if (unlikely(session->ds_count == session->max_ds_count))
189+
mlx5e_xdp_mpwqe_complete(sq);
190+
191+
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
192+
stats->xmit++;
193+
return true;
194+
}
195+
196+
static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
106197
{
107198
struct mlx5_wq_cyc *wq = &sq->wq;
108199
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -304,16 +395,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
304395

305396
xdpi.xdpf = xdpf;
306397

307-
if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
398+
if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
308399
dma_unmap_single(sq->pdev, xdpi.dma_addr,
309400
xdpf->len, DMA_TO_DEVICE);
310401
xdp_return_frame_rx_napi(xdpf);
311402
drops++;
312403
}
313404
}
314405

315-
if (flags & XDP_XMIT_FLUSH)
406+
if (flags & XDP_XMIT_FLUSH) {
407+
if (sq->mpwqe.wqe)
408+
mlx5e_xdp_mpwqe_complete(sq);
316409
mlx5e_xmit_xdp_doorbell(sq);
410+
}
317411

318412
return n - drops;
319413
}
@@ -322,10 +416,20 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
322416
{
323417
struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
324418

419+
if (xdpsq->mpwqe.wqe)
420+
mlx5e_xdp_mpwqe_complete(xdpsq);
421+
325422
mlx5e_xmit_xdp_doorbell(xdpsq);
326423

327424
if (xdpsq->redirect_flush) {
328425
xdp_do_flush_map();
329426
xdpsq->redirect_flush = false;
330427
}
331428
}
429+
430+
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
431+
{
432+
sq->xmit_xdp_frame = is_mpw ?
433+
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
434+
}
435+

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,16 @@
3737
#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
3838
MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
3939
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
40-
#define MLX5E_XDP_TX_DS_COUNT \
41-
((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
40+
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
41+
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
42+
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
4243

4344
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
4445
void *va, u16 *rx_headroom, u32 *len);
4546
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
4647
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
48+
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
4749
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
48-
bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
4950
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
5051
u32 flags);
5152

@@ -57,6 +58,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
5758
}
5859
}
5960

61+
static inline void
62+
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
63+
{
64+
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
65+
struct mlx5_wqe_data_seg *dseg =
66+
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
67+
68+
dseg->addr = cpu_to_be64(dma_addr);
69+
dseg->byte_count = cpu_to_be32(dma_len);
70+
dseg->lkey = sq->mkey_be;
71+
}
72+
73+
static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
74+
struct mlx5e_tx_wqe **wqe)
75+
{
76+
struct mlx5_wq_cyc *wq = &sq->wq;
77+
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
78+
79+
*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
80+
memset(*wqe, 0, sizeof(**wqe));
81+
}
82+
6083
static inline void
6184
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
6285
struct mlx5e_xdp_info *xi)

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct mlx5e_rq_param {
6161
struct mlx5e_sq_param {
6262
u32 sqc[MLX5_ST_SZ_DW(sqc)];
6363
struct mlx5_wq_param wq;
64+
bool is_mpw;
6465
};
6566

6667
struct mlx5e_cq_param {
@@ -1586,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
15861587
struct mlx5e_xdpsq *sq,
15871588
bool is_redirect)
15881589
{
1589-
unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
15901590
struct mlx5e_create_sq_param csp = {};
1591-
unsigned int inline_hdr_sz = 0;
15921591
int err;
1593-
int i;
15941592

15951593
err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
15961594
if (err)
@@ -1606,27 +1604,35 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
16061604
if (err)
16071605
goto err_free_xdpsq;
16081606

1609-
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
1610-
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
1611-
ds_cnt++;
1612-
}
1607+
mlx5e_set_xmit_fp(sq, param->is_mpw);
1608+
1609+
if (!param->is_mpw) {
1610+
unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
1611+
unsigned int inline_hdr_sz = 0;
1612+
int i;
16131613

1614-
/* Pre initialize fixed WQE fields */
1615-
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
1616-
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
1617-
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
1618-
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
1619-
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
1620-
struct mlx5_wqe_data_seg *dseg;
1614+
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
1615+
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
1616+
ds_cnt++;
1617+
}
1618+
1619+
/* Pre initialize fixed WQE fields */
1620+
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
1621+
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
1622+
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
1623+
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
1624+
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
1625+
struct mlx5_wqe_data_seg *dseg;
16211626

1622-
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
1623-
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
1627+
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
1628+
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
16241629

1625-
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
1626-
dseg->lkey = sq->mkey_be;
1630+
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
1631+
dseg->lkey = sq->mkey_be;
16271632

1628-
wi->num_wqebbs = 1;
1629-
wi->num_ds = 1;
1633+
wi->num_wqebbs = 1;
1634+
wi->num_ds = 1;
1635+
}
16301636
}
16311637

16321638
return 0;
@@ -2335,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
23352341

23362342
mlx5e_build_sq_param_common(priv, param);
23372343
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
2344+
param->is_mpw = MLX5_CAP_ETH(priv->mdev, enhanced_multi_pkt_send_wqe);
23382345
}
23392346

23402347
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,

include/linux/mlx5/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,7 @@ enum {
421421
MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15,
422422
MLX5_OPCODE_BIND_MW = 0x18,
423423
MLX5_OPCODE_CONFIG_CMD = 0x1f,
424+
MLX5_OPCODE_ENHANCED_MPSW = 0x29,
424425

425426
MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
426427
MLX5_RECV_OPCODE_SEND = 0x01,

0 commit comments

Comments
 (0)