Skip to content

Commit ee40681

Browse files
Nelson Changdavem330
authored andcommitted
net: ethernet: mediatek: add HW LRO functions of PDMA RX rings
The codes add the large receive offload (LRO) functions by hardware as below: 1) PDMA has total four RX rings that one is the normal ring, and others can be configured as LRO rings. 2) Only TCP/IP RX flows can be offloaded. The hardware can set four IP addresses at most, if the destination IP of the RX flow matches one of them, it has the chance to be offloaded. 3) There three RX flows can be offloaded at most, and one flow is mapped to one RX ring. 4) If there are more than three candidate RX flows, the hardware can choose three of them by throughput comparison results. Signed-off-by: Nelson Chang <nelson.chang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 0fbc81b commit ee40681

File tree

2 files changed

+265
-25
lines changed

2 files changed

+265
-25
lines changed

drivers/net/ethernet/mediatek/mtk_eth_soc.c

Lines changed: 193 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -820,11 +820,51 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
820820
return NETDEV_TX_OK;
821821
}
822822

823+
static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth)
824+
{
825+
int i;
826+
struct mtk_rx_ring *ring;
827+
int idx;
828+
829+
if (!eth->hwlro)
830+
return &eth->rx_ring[0];
831+
832+
for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
833+
ring = &eth->rx_ring[i];
834+
idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
835+
if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
836+
ring->calc_idx_update = true;
837+
return ring;
838+
}
839+
}
840+
841+
return NULL;
842+
}
843+
844+
static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
845+
{
846+
struct mtk_rx_ring *ring;
847+
int i;
848+
849+
if (!eth->hwlro) {
850+
ring = &eth->rx_ring[0];
851+
mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
852+
} else {
853+
for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
854+
ring = &eth->rx_ring[i];
855+
if (ring->calc_idx_update) {
856+
ring->calc_idx_update = false;
857+
mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
858+
}
859+
}
860+
}
861+
}
862+
823863
static int mtk_poll_rx(struct napi_struct *napi, int budget,
824864
struct mtk_eth *eth)
825865
{
826-
struct mtk_rx_ring *ring = &eth->rx_ring;
827-
int idx = ring->calc_idx;
866+
struct mtk_rx_ring *ring;
867+
int idx;
828868
struct sk_buff *skb;
829869
u8 *data, *new_data;
830870
struct mtk_rx_dma *rxd, trxd;
@@ -836,7 +876,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
836876
dma_addr_t dma_addr;
837877
int mac = 0;
838878

839-
idx = NEXT_RX_DESP_IDX(idx);
879+
ring = mtk_get_rx_ring(eth);
880+
if (unlikely(!ring))
881+
goto rx_done;
882+
883+
idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
840884
rxd = &ring->dma[idx];
841885
data = ring->data[idx];
842886

@@ -907,12 +951,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
907951
done++;
908952
}
909953

954+
rx_done:
910955
if (done) {
911956
/* make sure that all changes to the dma ring are flushed before
912957
* we continue
913958
*/
914959
wmb();
915-
mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0);
960+
mtk_update_rx_cpu_idx(eth);
916961
}
917962

918963
return done;
@@ -1135,32 +1180,41 @@ static void mtk_tx_clean(struct mtk_eth *eth)
11351180
}
11361181
}
11371182

1138-
static int mtk_rx_alloc(struct mtk_eth *eth)
1183+
static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
11391184
{
1140-
struct mtk_rx_ring *ring = &eth->rx_ring;
1185+
struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
1186+
int rx_data_len, rx_dma_size;
11411187
int i;
11421188

1143-
ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
1189+
if (rx_flag == MTK_RX_FLAGS_HWLRO) {
1190+
rx_data_len = MTK_MAX_LRO_RX_LENGTH;
1191+
rx_dma_size = MTK_HW_LRO_DMA_SIZE;
1192+
} else {
1193+
rx_data_len = ETH_DATA_LEN;
1194+
rx_dma_size = MTK_DMA_SIZE;
1195+
}
1196+
1197+
ring->frag_size = mtk_max_frag_size(rx_data_len);
11441198
ring->buf_size = mtk_max_buf_size(ring->frag_size);
1145-
ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
1199+
ring->data = kcalloc(rx_dma_size, sizeof(*ring->data),
11461200
GFP_KERNEL);
11471201
if (!ring->data)
11481202
return -ENOMEM;
11491203

1150-
for (i = 0; i < MTK_DMA_SIZE; i++) {
1204+
for (i = 0; i < rx_dma_size; i++) {
11511205
ring->data[i] = netdev_alloc_frag(ring->frag_size);
11521206
if (!ring->data[i])
11531207
return -ENOMEM;
11541208
}
11551209

11561210
ring->dma = dma_alloc_coherent(eth->dev,
1157-
MTK_DMA_SIZE * sizeof(*ring->dma),
1211+
rx_dma_size * sizeof(*ring->dma),
11581212
&ring->phys,
11591213
GFP_ATOMIC | __GFP_ZERO);
11601214
if (!ring->dma)
11611215
return -ENOMEM;
11621216

1163-
for (i = 0; i < MTK_DMA_SIZE; i++) {
1217+
for (i = 0; i < rx_dma_size; i++) {
11641218
dma_addr_t dma_addr = dma_map_single(eth->dev,
11651219
ring->data[i] + NET_SKB_PAD,
11661220
ring->buf_size,
@@ -1171,27 +1225,30 @@ static int mtk_rx_alloc(struct mtk_eth *eth)
11711225

11721226
ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
11731227
}
1174-
ring->calc_idx = MTK_DMA_SIZE - 1;
1228+
ring->dma_size = rx_dma_size;
1229+
ring->calc_idx_update = false;
1230+
ring->calc_idx = rx_dma_size - 1;
1231+
ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no);
11751232
/* make sure that all changes to the dma ring are flushed before we
11761233
* continue
11771234
*/
11781235
wmb();
11791236

1180-
mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0);
1181-
mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0);
1182-
mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0);
1183-
mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX);
1237+
mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
1238+
mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
1239+
mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
1240+
mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
11841241

11851242
return 0;
11861243
}
11871244

1188-
static void mtk_rx_clean(struct mtk_eth *eth)
1245+
static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
11891246
{
1190-
struct mtk_rx_ring *ring = &eth->rx_ring;
1247+
struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
11911248
int i;
11921249

11931250
if (ring->data && ring->dma) {
1194-
for (i = 0; i < MTK_DMA_SIZE; i++) {
1251+
for (i = 0; i < ring->dma_size; i++) {
11951252
if (!ring->data[i])
11961253
continue;
11971254
if (!ring->dma[i].rxd1)
@@ -1208,13 +1265,98 @@ static void mtk_rx_clean(struct mtk_eth *eth)
12081265

12091266
if (ring->dma) {
12101267
dma_free_coherent(eth->dev,
1211-
MTK_DMA_SIZE * sizeof(*ring->dma),
1268+
ring->dma_size * sizeof(*ring->dma),
12121269
ring->dma,
12131270
ring->phys);
12141271
ring->dma = NULL;
12151272
}
12161273
}
12171274

1275+
static int mtk_hwlro_rx_init(struct mtk_eth *eth)
1276+
{
1277+
int i;
1278+
u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0;
1279+
u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0;
1280+
1281+
/* set LRO rings to auto-learn modes */
1282+
ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE;
1283+
1284+
/* validate LRO ring */
1285+
ring_ctrl_dw2 |= MTK_RING_VLD;
1286+
1287+
/* set AGE timer (unit: 20us) */
1288+
ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H;
1289+
ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L;
1290+
1291+
/* set max AGG timer (unit: 20us) */
1292+
ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME;
1293+
1294+
/* set max LRO AGG count */
1295+
ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L;
1296+
ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H;
1297+
1298+
for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
1299+
mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i));
1300+
mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i));
1301+
mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i));
1302+
}
1303+
1304+
/* IPv4 checksum update enable */
1305+
lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN;
1306+
1307+
/* switch priority comparison to packet count mode */
1308+
lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE;
1309+
1310+
/* bandwidth threshold setting */
1311+
mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2);
1312+
1313+
/* auto-learn score delta setting */
1314+
mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA);
1315+
1316+
/* set refresh timer for altering flows to 1 sec. (unit: 20us) */
1317+
mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME,
1318+
MTK_PDMA_LRO_ALT_REFRESH_TIMER);
1319+
1320+
/* set HW LRO mode & the max aggregation count for rx packets */
1321+
lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff);
1322+
1323+
/* the minimal remaining room of SDL0 in RXD for lro aggregation */
1324+
lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL;
1325+
1326+
/* enable HW LRO */
1327+
lro_ctrl_dw0 |= MTK_LRO_EN;
1328+
1329+
mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3);
1330+
mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0);
1331+
1332+
return 0;
1333+
}
1334+
1335+
static void mtk_hwlro_rx_uninit(struct mtk_eth *eth)
1336+
{
1337+
int i;
1338+
u32 val;
1339+
1340+
/* relinquish lro rings, flush aggregated packets */
1341+
mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0);
1342+
1343+
/* wait for relinquishments done */
1344+
for (i = 0; i < 10; i++) {
1345+
val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0);
1346+
if (val & MTK_LRO_RING_RELINQUISH_DONE) {
1347+
msleep(20);
1348+
continue;
1349+
}
1350+
}
1351+
1352+
/* invalidate lro rings */
1353+
for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
1354+
mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i));
1355+
1356+
/* disable HW LRO */
1357+
mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0);
1358+
}
1359+
12181360
/* wait for DMA to finish whatever it is doing before we start using it again */
12191361
static int mtk_dma_busy_wait(struct mtk_eth *eth)
12201362
{
@@ -1235,6 +1377,7 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth)
12351377
static int mtk_dma_init(struct mtk_eth *eth)
12361378
{
12371379
int err;
1380+
u32 i;
12381381

12391382
if (mtk_dma_busy_wait(eth))
12401383
return -EBUSY;
@@ -1250,10 +1393,21 @@ static int mtk_dma_init(struct mtk_eth *eth)
12501393
if (err)
12511394
return err;
12521395

1253-
err = mtk_rx_alloc(eth);
1396+
err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
12541397
if (err)
12551398
return err;
12561399

1400+
if (eth->hwlro) {
1401+
for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
1402+
err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO);
1403+
if (err)
1404+
return err;
1405+
}
1406+
err = mtk_hwlro_rx_init(eth);
1407+
if (err)
1408+
return err;
1409+
}
1410+
12571411
/* Enable random early drop and set drop threshold automatically */
12581412
mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
12591413
MTK_QDMA_FC_THRES);
@@ -1278,7 +1432,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
12781432
eth->phy_scratch_ring = 0;
12791433
}
12801434
mtk_tx_clean(eth);
1281-
mtk_rx_clean(eth);
1435+
mtk_rx_clean(eth, 0);
1436+
1437+
if (eth->hwlro) {
1438+
mtk_hwlro_rx_uninit(eth);
1439+
for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
1440+
mtk_rx_clean(eth, i);
1441+
}
1442+
12821443
kfree(eth->scratch_head);
12831444
}
12841445

@@ -1873,6 +2034,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
18732034
mac->hw = eth;
18742035
mac->of_node = np;
18752036

2037+
memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
2038+
mac->hwlro_ip_cnt = 0;
2039+
18762040
mac->hw_stats = devm_kzalloc(eth->dev,
18772041
sizeof(*mac->hw_stats),
18782042
GFP_KERNEL);
@@ -1889,6 +2053,11 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
18892053
eth->netdev[id]->watchdog_timeo = 5 * HZ;
18902054
eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
18912055
eth->netdev[id]->base_addr = (unsigned long)eth->base;
2056+
2057+
eth->netdev[id]->hw_features = MTK_HW_FEATURES;
2058+
if (eth->hwlro)
2059+
eth->netdev[id]->hw_features |= NETIF_F_LRO;
2060+
18922061
eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
18932062
~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
18942063
eth->netdev[id]->features |= MTK_HW_FEATURES;
@@ -1941,6 +2110,8 @@ static int mtk_probe(struct platform_device *pdev)
19412110
return PTR_ERR(eth->pctl);
19422111
}
19432112

2113+
eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro");
2114+
19442115
for (i = 0; i < 3; i++) {
19452116
eth->irq[i] = platform_get_irq(pdev, i);
19462117
if (eth->irq[i] < 0) {

0 commit comments

Comments
 (0)