Skip to content

Commit

Permalink
net/iavf: remove incorrect 16B descriptor read block
Browse files Browse the repository at this point in the history
[ upstream commit d4ade5d02d188fcbe51871c5a5d66ef075ca0f86 ]

By default, the driver works with 32B descriptors, but has a separate
descriptor read block for reading two descriptors at a time when using
16B descriptors. However, the 32B reads used are not guaranteed to be
atomic, which will cause issues if that is not the case on a system,
since the descriptors may be read in an undefined order.  Remove the
block, to avoid issues, and just use the regular descriptor reading path
for 16B descriptors, if that support is enabled at build time.

Fixes: af0c246 ("net/iavf: enable AVX2 for iavf")

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
  • Loading branch information
bruce-richardson authored and bluca committed Mar 13, 2024
1 parent 614a3bf commit 0d2f5b4
Showing 1 changed file with 24 additions and 56 deletions.
80 changes: 24 additions & 56 deletions drivers/net/iavf/iavf_rxtx_vec_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,62 +192,30 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq,
_mm256_loadu_si256((void *)&sw_ring[i + 4]));
#endif

__m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7;
#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
/* for AVX we need alignment otherwise loads are not atomic */
if (avx_aligned) {
/* load in descriptors, 2 at a time, in reverse order */
raw_desc6_7 = _mm256_load_si256((void *)(rxdp + 6));
rte_compiler_barrier();
raw_desc4_5 = _mm256_load_si256((void *)(rxdp + 4));
rte_compiler_barrier();
raw_desc2_3 = _mm256_load_si256((void *)(rxdp + 2));
rte_compiler_barrier();
raw_desc0_1 = _mm256_load_si256((void *)(rxdp + 0));
} else
#endif
{
const __m128i raw_desc7 =
_mm_load_si128((void *)(rxdp + 7));
rte_compiler_barrier();
const __m128i raw_desc6 =
_mm_load_si128((void *)(rxdp + 6));
rte_compiler_barrier();
const __m128i raw_desc5 =
_mm_load_si128((void *)(rxdp + 5));
rte_compiler_barrier();
const __m128i raw_desc4 =
_mm_load_si128((void *)(rxdp + 4));
rte_compiler_barrier();
const __m128i raw_desc3 =
_mm_load_si128((void *)(rxdp + 3));
rte_compiler_barrier();
const __m128i raw_desc2 =
_mm_load_si128((void *)(rxdp + 2));
rte_compiler_barrier();
const __m128i raw_desc1 =
_mm_load_si128((void *)(rxdp + 1));
rte_compiler_barrier();
const __m128i raw_desc0 =
_mm_load_si128((void *)(rxdp + 0));

raw_desc6_7 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc6),
raw_desc7, 1);
raw_desc4_5 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc4),
raw_desc5, 1);
raw_desc2_3 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc2),
raw_desc3, 1);
raw_desc0_1 =
_mm256_inserti128_si256
(_mm256_castsi128_si256(raw_desc0),
raw_desc1, 1);
}
const __m128i raw_desc7 = _mm_load_si128((void *)(rxdp + 7));
rte_compiler_barrier();
const __m128i raw_desc6 = _mm_load_si128((void *)(rxdp + 6));
rte_compiler_barrier();
const __m128i raw_desc5 = _mm_load_si128((void *)(rxdp + 5));
rte_compiler_barrier();
const __m128i raw_desc4 = _mm_load_si128((void *)(rxdp + 4));
rte_compiler_barrier();
const __m128i raw_desc3 = _mm_load_si128((void *)(rxdp + 3));
rte_compiler_barrier();
const __m128i raw_desc2 = _mm_load_si128((void *)(rxdp + 2));
rte_compiler_barrier();
const __m128i raw_desc1 = _mm_load_si128((void *)(rxdp + 1));
rte_compiler_barrier();
const __m128i raw_desc0 = _mm_load_si128((void *)(rxdp + 0));

const __m256i raw_desc6_7 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc6), raw_desc7, 1);
const __m256i raw_desc4_5 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc4), raw_desc5, 1);
const __m256i raw_desc2_3 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc2), raw_desc3, 1);
const __m256i raw_desc0_1 =
_mm256_inserti128_si256(_mm256_castsi128_si256(raw_desc0), raw_desc1, 1);

if (split_packet) {
int j;
Expand Down

0 comments on commit 0d2f5b4

Please sign in to comment.