From 4f9db530ed6e12dad66e21dd4d484719ec551963 Mon Sep 17 00:00:00 2001 From: Frank Du Date: Fri, 20 Mar 2020 12:14:04 -0500 Subject: [PATCH] ARROW-8166: [C++] fix AVX512 intrinsics fail with clang-8 __m512i_u undeclared in clang while _mm512_storeu_epi32 undefined in gcc, using memcpy instead for the unaligned stroe. BM_PlainDecodingBoolean with gcc get same level result with previous. Signed-off-by: Frank Du Closes #6673 from jianxind/avx512-build-with-clang Authored-by: Frank Du Signed-off-by: Wes McKinney --- cpp/src/arrow/util/bpacking.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/bpacking.h b/cpp/src/arrow/util/bpacking.h index 862e61d23eff6..72c8de79a71fd 100644 --- a/cpp/src/arrow/util/bpacking.h +++ b/cpp/src/arrow/util/bpacking.h @@ -39,20 +39,22 @@ namespace internal { #if defined(__AVX512F__) inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) { uint32_t inl = util::SafeLoad(in); - __m512i shifts, inls, masks; + __m512i shifts, inls, masks, result; inls = _mm512_set1_epi32(inl); masks = _mm512_set1_epi32(1); // shift the first 16 outs shifts = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - *(__m512i_u*)out = _mm512_and_epi32(_mm512_srlv_epi32(inls, shifts), masks); + result = _mm512_and_epi32(_mm512_srlv_epi32(inls, shifts), masks); + memcpy(out, &result, 16 * sizeof(*out)); out += 16; // shift the last 16 outs shifts = _mm512_set_epi32(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16); - *(__m512i_u*)out = _mm512_and_epi32(_mm512_srlv_epi32(inls, shifts), masks); + result = _mm512_and_epi32(_mm512_srlv_epi32(inls, shifts), masks); + memcpy(out, &result, 16 * sizeof(*out)); out += 16; ++in;