Skip to content

Commit

Permalink
net/ice: fix outer checksum flags
Browse files Browse the repository at this point in the history
[ upstream commit 75c6287 ]

When received tunneled packets, the testpmd output log shows 'ol_flags'
value always is 'PKT_RX_OUTER_L4_CKSUM_UNKNOWN', but expected value is
'PKT_RX_OUTER_L4_CKSUM_GOOD' or 'PKT_RX_OUTER_L4_CKSUM_BAD'.

Add the 'PKT_RX_OUTER_L4_CKSUM_GOOD' and 'PKT_RX_OUTER_L4_CKSUM_BAD' to
'flags' for normal path, 'l3_l4_flags_shuf' for AVX2 and AVX512 vector
path and 'cksum_flags' for SSE vector path to ensure that the 'ol_flags'
can match correct flags.

Fixes: dbf3c0e ("net/ice: handle Rx flex descriptor")
Fixes: 4ab7dbb ("net/ice: switch to Rx flexible descriptor in AVX path")
Fixes: ece1f8a ("net/ice: switch to flexible descriptor in SSE path")

Signed-off-by: Murphy Yang <murphyx.yang@intel.com>
Acked-by: Qi Zhang <qi.z.zhang@intel.com>
  • Loading branch information
Murphy Yang authored and bluca committed Feb 2, 2021
1 parent 2cbc618 commit 482a496
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 85 deletions.
5 changes: 5 additions & 0 deletions drivers/net/ice/ice_rxtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,11 @@ ice_rxd_error_to_pkt_flags(uint16_t stat_err0)
if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
flags |= PKT_RX_EIP_CKSUM_BAD;

if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
else
flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;

return flags;
}

Expand Down
118 changes: 86 additions & 32 deletions drivers/net/ice/ice_rxtx_vec_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,43 +251,88 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* bit13 is for VLAN indication.
*/
const __m256i flags_mask =
_mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
_mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
/**
* data to be shuffled by the result of the flags mask shifted by 4
* bits. This gives use the l3_l4 flags.
*/
const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
/* shift right 1 bit to make sure it not exceed 255 */
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
/* second 128-bits */
0, 0, 0, 0, 0, 0, 0, 0,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i l3_l4_flags_shuf =
_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
/**
* second 128-bits
* shift right 20 bits to use the low two bits to indicate
* outer checksum status
* shift right 1 bit to make sure it not exceed 255
*/
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i cksum_mask =
_mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_EIP_CKSUM_BAD);
_mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD |
PKT_RX_OUTER_L4_CKSUM_MASK);
/**
* data to be shuffled by result of flag mask, shifted down 12.
* If RSS(bit12)/VLAN(bit13) are set,
Expand Down Expand Up @@ -469,6 +514,15 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
_mm256_srli_epi32(flag_bits, 4));
l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);

__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
__m256i l4_outer_flags =
_mm256_and_si256(l3_l4_flags, l4_outer_mask);
l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);

__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
/* set rss and vlan flags */
const __m256i rss_vlan_flag_bits =
Expand Down
117 changes: 85 additions & 32 deletions drivers/net/ice/ice_rxtx_vec_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,43 +230,88 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
* bit13 is for VLAN indication.
*/
const __m256i flags_mask =
_mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
_mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
/**
* data to be shuffled by the result of the flags mask shifted by 4
* bits. This gives use the l3_l4 flags.
*/
const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
/* shift right 1 bit to make sure it not exceed 255 */
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
/* 2nd 128-bits */
0, 0, 0, 0, 0, 0, 0, 0,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i l3_l4_flags_shuf =
_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
/**
* second 128-bits
* shift right 20 bits to use the low two bits to indicate
* outer checksum status
* shift right 1 bit to make sure it not exceed 255
*/
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i cksum_mask =
_mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_EIP_CKSUM_BAD);
_mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD |
PKT_RX_OUTER_L4_CKSUM_MASK);
/**
* data to be shuffled by result of flag mask, shifted down 12.
* If RSS(bit12)/VLAN(bit13) are set,
Expand Down Expand Up @@ -451,6 +496,14 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
_mm256_srli_epi32(flag_bits, 4));
l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
__m256i l4_outer_flags =
_mm256_and_si256(l3_l4_flags, l4_outer_mask);
l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);

__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
/* set rss and vlan flags */
const __m256i rss_vlan_flag_bits =
Expand Down
78 changes: 57 additions & 21 deletions drivers/net/ice/ice_rxtx_vec_sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,39 +114,67 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
* bit12 for RSS indication.
* bit13 for VLAN indication.
*/
const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
0x3070, 0x3070);

const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0,
0x30f0, 0x30f0);
const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD,
PKT_RX_IP_CKSUM_MASK |
PKT_RX_L4_CKSUM_MASK |
PKT_RX_OUTER_L4_CKSUM_MASK |
PKT_RX_EIP_CKSUM_BAD);

/* map the checksum, rss and vlan fields to the checksum, rss
* and vlan flag
*/
const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
/* shift right 1 bit to make sure it not exceed 255 */
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m128i cksum_flags =
_mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
/**
* shift right 20 bits to use the low two bits to indicate
* outer checksum status
* shift right 1 bit to make sure it not exceed 255
*/
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_GOOD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_BAD) >> 1,
(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_IP_CKSUM_GOOD) >> 1);

const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
0, 0, 0, 0,
Expand All @@ -166,6 +194,14 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
/* then we shift left 1 bit */
flags = _mm_slli_epi32(flags, 1);

__m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6);
__m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask);
l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20);

__m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6);
__m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask);
flags = _mm_or_si128(l3_l4_flags, l4_outer_flags);
/* we need to mask out the reduntant bits introduced by RSS or
* VLAN fields.
*/
Expand Down Expand Up @@ -217,10 +253,10 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
* appropriate flags means that we have to do a shift and blend for
* each mbuf before we do the write.
*/
rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30);
rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30);
rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30);
rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30);

/* write the rearm data and the olflags in one write */
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
Expand Down

0 comments on commit 482a496

Please sign in to comment.