Skip to content

Commit

Permalink
net/hns3: fix order in NEON Rx
Browse files Browse the repository at this point in the history
[ upstream commit 7dd439ed998c36c8d0204c436cc656af08cfa5fc ]

This patch reorders the order of the NEON Rx for better maintenance
and easier understanding.

Fixes: a3d4f4d ("net/hns3: support NEON Rx")

Signed-off-by: Huisong Li <lihuisong@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
  • Loading branch information
LiHuiSong1 authored and bluca committed Oct 18, 2023
1 parent dfc299e commit b88f88c
Showing 1 changed file with 31 additions and 47 deletions.
78 changes: 31 additions & 47 deletions drivers/net/hns3/hns3_rxtx_vec_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,97 +168,81 @@ hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);

/* load 2 mbuf pointer */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);

bd_vld = vshl_n_u16(bd_vld,
HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
bd_vld = vreinterpret_u16_s16(
vshr_n_s16(vreinterpret_s16_u16(bd_vld),
HNS3_UINT16_BIT - 1));
stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);

/* load 2 mbuf pointer again */
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);

if (likely(stat == 0))
bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
else
bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;
if (bd_valid_num == 0)
break;

/* use offset to control below data load oper ordering */
offset = rxq->offset_table[bd_valid_num];
/* load 4 mbuf pointer */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);

/* store 2 mbuf pointer into rx_pkts */
/* store 4 mbuf pointer into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);

/* read first two descs */
/* use offset to control below data load oper ordering */
offset = rxq->offset_table[bd_valid_num];

/* read 4 descs */
descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));

/* store 2 mbuf pointer into rx_pkts again */
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);

/* read remains two descs */
descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));

pkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]);
pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);

/* pkt 1,2 convert format from desc to pktmbuf */
/* 4 packets convert format from desc to pktmbuf */
pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);

/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
rxq->mbuf_initializer;

/* pkt 1,2 remove crc */
/* 4 packets remove crc */
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
pkt_mb1 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
pkt_mb2 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
pkt_mb3 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
pkt_mb4 = vreinterpretq_u8_u16(tmp);

pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);

/* pkt 3,4 convert format from desc to pktmbuf */
pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);

/* pkt 1,2 save to rx_pkts mbuf */
/* save packet info to rx_pkts mbuf */
vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
pkt_mb1);
vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
pkt_mb2);
vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
pkt_mb3);
vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
pkt_mb4);

/* pkt 3,4 remove crc */
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
pkt_mb3 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
pkt_mb4 = vreinterpretq_u8_u16(tmp);

/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
/* store the first 8 bytes of packets mbuf's rearm_data */
*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
rxq->mbuf_initializer;

/* pkt 3,4 save to rx_pkts mbuf */
vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
pkt_mb3);
vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
pkt_mb4);

rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);

parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
Expand Down

0 comments on commit b88f88c

Please sign in to comment.