Skip to content

Commit 1943d8b

Browse files
jbrandebJeff Kirsher
authored andcommitted
i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor ring by marking each descriptor with a DD bit, and instead writes a memory location with an update to where the driver should clean up to. Enable this feature. Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
1 parent 6c167f5 commit 1943d8b

File tree

4 files changed

+88
-12
lines changed

4 files changed

+88
-12
lines changed

drivers/net/ethernet/intel/i40e/i40e_main.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
21812181
tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
21822182
I40E_FLAG_FD_ATR_ENABLED));
21832183
tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
2184+
/* FDIR VSI tx ring can still use RS bit and writebacks */
2185+
if (vsi->type != I40E_VSI_FDIR)
2186+
tx_ctx.head_wb_ena = 1;
2187+
tx_ctx.head_wb_addr = ring->dma +
2188+
(ring->count * sizeof(struct i40e_tx_desc));
21842189

21852190
/* As part of VSI creation/update, FW allocates certain
21862191
* Tx arbitration queue sets for each TC enabled for

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
618618
return ret;
619619
}
620620

621+
/**
622+
* i40e_get_head - Retrieve head from head writeback
623+
* @tx_ring: tx ring to fetch head of
624+
*
625+
* Returns value of Tx ring head based on value stored
626+
* in head write-back location
627+
**/
628+
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
629+
{
630+
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
631+
632+
return le32_to_cpu(*(volatile __le32 *)head);
633+
}
634+
621635
/**
622636
* i40e_clean_tx_irq - Reclaim resources after transmit completes
623637
* @tx_ring: tx ring to clean
@@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
629643
{
630644
u16 i = tx_ring->next_to_clean;
631645
struct i40e_tx_buffer *tx_buf;
646+
struct i40e_tx_desc *tx_head;
632647
struct i40e_tx_desc *tx_desc;
633648
unsigned int total_packets = 0;
634649
unsigned int total_bytes = 0;
@@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
637652
tx_desc = I40E_TX_DESC(tx_ring, i);
638653
i -= tx_ring->count;
639654

655+
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
656+
640657
do {
641658
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
642659

@@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
647664
/* prevent any other reads prior to eop_desc */
648665
read_barrier_depends();
649666

650-
/* if the descriptor isn't done, no work yet to do */
651-
if (!(eop_desc->cmd_type_offset_bsz &
652-
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
667+
/* we have caught up to head, no work left to do */
668+
if (tx_head == tx_desc)
653669
break;
654670

655671
/* clear next_to_watch to prevent false hangs */
@@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
905921

906922
/* round up to nearest 4K */
907923
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
924+
/* add u32 for head writeback, align after this takes care of
925+
* guaranteeing this is at least one cache line in size
926+
*/
927+
tx_ring->size += sizeof(u32);
908928
tx_ring->size = ALIGN(tx_ring->size, 4096);
909929
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
910930
&tx_ring->dma, GFP_KERNEL);
@@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
20422062
tx_bi = &tx_ring->tx_bi[i];
20432063
}
20442064

2045-
tx_desc->cmd_type_offset_bsz =
2046-
build_ctob(td_cmd, td_offset, size, td_tag) |
2047-
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
2065+
/* Place RS bit on last descriptor of any packet that spans across the
2066+
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2067+
*/
2068+
#define WB_STRIDE 0x3
2069+
if (((i & WB_STRIDE) != WB_STRIDE) &&
2070+
(first <= &tx_ring->tx_bi[i]) &&
2071+
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2072+
tx_desc->cmd_type_offset_bsz =
2073+
build_ctob(td_cmd, td_offset, size, td_tag) |
2074+
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2075+
I40E_TXD_QW1_CMD_SHIFT);
2076+
} else {
2077+
tx_desc->cmd_type_offset_bsz =
2078+
build_ctob(td_cmd, td_offset, size, td_tag) |
2079+
cpu_to_le64((u64)I40E_TXD_CMD <<
2080+
I40E_TXD_QW1_CMD_SHIFT);
2081+
}
20482082

20492083
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
20502084
tx_ring->queue_index),

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
230230
tx_ctx.qlen = info->ring_len;
231231
tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
232232
tx_ctx.rdylist_act = 0;
233+
tx_ctx.head_wb_ena = 1;
234+
tx_ctx.head_wb_addr = info->dma_ring_addr +
235+
(info->ring_len * sizeof(struct i40e_tx_desc));
233236

234237
/* clear the context in the HMC */
235238
ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);

drivers/net/ethernet/intel/i40evf/i40e_txrx.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
169169
return ret;
170170
}
171171

172+
/**
173+
* i40e_get_head - Retrieve head from head writeback
174+
* @tx_ring: tx ring to fetch head of
175+
*
176+
* Returns value of Tx ring head based on value stored
177+
* in head write-back location
178+
**/
179+
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
180+
{
181+
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
182+
183+
return le32_to_cpu(*(volatile __le32 *)head);
184+
}
185+
172186
/**
173187
* i40e_clean_tx_irq - Reclaim resources after transmit completes
174188
* @tx_ring: tx ring to clean
@@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
180194
{
181195
u16 i = tx_ring->next_to_clean;
182196
struct i40e_tx_buffer *tx_buf;
197+
struct i40e_tx_desc *tx_head;
183198
struct i40e_tx_desc *tx_desc;
184199
unsigned int total_packets = 0;
185200
unsigned int total_bytes = 0;
@@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
188203
tx_desc = I40E_TX_DESC(tx_ring, i);
189204
i -= tx_ring->count;
190205

206+
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
207+
191208
do {
192209
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
193210

@@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
198215
/* prevent any other reads prior to eop_desc */
199216
read_barrier_depends();
200217

201-
/* if the descriptor isn't done, no work yet to do */
202-
if (!(eop_desc->cmd_type_offset_bsz &
203-
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
218+
/* we have caught up to head, no work left to do */
219+
if (tx_head == tx_desc)
204220
break;
205221

206222
/* clear next_to_watch to prevent false hangs */
@@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
432448

433449
/* round up to nearest 4K */
434450
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
451+
/* add u32 for head writeback, align after this takes care of
452+
* guaranteeing this is at least one cache line in size
453+
*/
454+
tx_ring->size += sizeof(u32);
435455
tx_ring->size = ALIGN(tx_ring->size, 4096);
436456
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
437457
&tx_ring->dma, GFP_KERNEL);
@@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
13771397
tx_bi = &tx_ring->tx_bi[i];
13781398
}
13791399

1380-
tx_desc->cmd_type_offset_bsz =
1381-
build_ctob(td_cmd, td_offset, size, td_tag) |
1382-
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
1400+
/* Place RS bit on last descriptor of any packet that spans across the
1401+
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
1402+
*/
1403+
#define WB_STRIDE 0x3
1404+
if (((i & WB_STRIDE) != WB_STRIDE) &&
1405+
(first <= &tx_ring->tx_bi[i]) &&
1406+
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
1407+
tx_desc->cmd_type_offset_bsz =
1408+
build_ctob(td_cmd, td_offset, size, td_tag) |
1409+
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
1410+
I40E_TXD_QW1_CMD_SHIFT);
1411+
} else {
1412+
tx_desc->cmd_type_offset_bsz =
1413+
build_ctob(td_cmd, td_offset, size, td_tag) |
1414+
cpu_to_le64((u64)I40E_TXD_CMD <<
1415+
I40E_TXD_QW1_CMD_SHIFT);
1416+
}
13831417

13841418
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
13851419
tx_ring->queue_index),

0 commit comments

Comments
 (0)