From c315129925e84b7f7f538550409a23dac988e085 Mon Sep 17 00:00:00 2001 From: "Ahn, Dongkyun" Date: Tue, 9 Aug 2022 14:34:54 -0700 Subject: [PATCH 1/4] [SYCL][ESIMD][EMU] lsc_atomic support - __esimd_lsc_xatomic_slm_0/1/2 - __esimd_lsc_xatomic_bti_0/1/2/ - __esimd_lsc_xatomic_stateless_0/1/2 --- .../ext/intel/esimd/detail/atomic_intrin.hpp | 97 ++++ .../esimd/detail/memory_intrin.hpp | 454 +++++++++++++++++- 2 files changed, 530 insertions(+), 21 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp index 1d93b71f1e2c9..5eb2e0b08f6c1 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp @@ -13,6 +13,25 @@ // This function implements atomic update of pre-existing variable in the // absense of C++ 20's atomic_ref. + +template Ty atomic_load(Ty *ptr) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + return __atomic_load(ptr, __ATOMIC_RELAXED); +#endif +} + +template Ty atomic_store(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + __atomic_store(ptr, val, __ATOMIC_RELAXED); +#endif +} + template Ty atomic_add_fetch(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon @@ -22,4 +41,82 @@ template Ty atomic_add_fetch(Ty *ptr, Ty val) { #endif } +template Ty atomic_sub_fetch(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + return __atomic_sub_fetch(ptr, val, __ATOMIC_RELAXED); +#endif +} + +template Ty atomic_and_fetch(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + return __atomic_and_fetch(ptr, val, __ATOMIC_RELAXED); +#endif +} + +template Ty atomic_or_fetch(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + return __atomic_or_fetch(ptr, val, __ATOMIC_RELAXED); +#endif +} + +template Ty atomic_xor_fetch(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + return __atomic_xor_fetch(ptr, val, __ATOMIC_RELAXED); +#endif +} + +template Ty atomic_min(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + Ty _old, _new; + do { + _old = *ptr; + _new = std::min(_old, val); + } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + return _new; +#endif +} + +template Ty atomic_max(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + Ty _old, _new; + do { + _old = *ptr; + _new = std::max(_old, val); + } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + return _new; +#endif +} + +template Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + Ty _old = expected; + __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + return *ptr; +#endif +} + /// @endcond ESIMD_DETAIL diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 9abf8593ee81a..8b35815877c32 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -12,6 +12,7 @@ #pragma once +#include #include // generic work-group split barrier @@ -573,7 +574,209 @@ void __esimd_emu_write_2d(__ESIMD_DNS::simd_mask_storage_t Pred, } // yWrite loop } -#endif +/// Helper function for zero-source LSC-atomic operation accessing BTI +/// or SLM +template +auto __esimd_emu_lsc_xatomic_offset_access_0( + __ESIMD_DNS::simd_mask_storage_t Pred, + __ESIMD_DNS::vector_type_t Offsets, const char *BaseAddr, + const int BufByteWidth) { + + assert(BaseAddr != nullptr && + "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); + + __ESIMD_DNS::vector_type_t()> Output = 0; + + for (int OffsetIdx = 0; OffsetIdx < N; OffsetIdx += 1) { + if (Pred[OffsetIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } + + assert(((Offsets[OffsetIdx] & MASK)) == 0 && "Offset Alignment Error!!"); + + // ByteDistance : byte-distance from buffer-access base + int ByteDistance = Offsets[OffsetIdx]; + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + for (int ChanelIdx = 0, VecIdx = OffsetIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); + } + } + } + } + return Output; +} + +/// Helper function for one-source LSC-atomic operation accessing BTI +/// or SLM +template +auto __esimd_emu_lsc_xatomic_offset_access_1( + __ESIMD_DNS::simd_mask_storage_t Pred, + __ESIMD_DNS::vector_type_t Offsets, const char *BaseAddr, + const int BufByteWidth, + __ESIMD_DNS::vector_type_t()> src0) { + + assert(BaseAddr != nullptr && + "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); + + __ESIMD_DNS::vector_type_t()> Output = 0; + + static_assert(AddressScale == 1); + static_assert(ImmOffset == 0); + static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); + + for (int OffsetIdx = 0; OffsetIdx < N; OffsetIdx += 1) { + if (Pred[OffsetIdx] == 0) { + // Skip input vector elements correpsonding to + // predicates whose value is zero + continue; + } + + assert(((Offsets[OffsetIdx] & MASK)) == 0 && "Offset Alignment Error!!"); + + // ByteDistance : byte-distance from buffer-write base + int ByteDistance = Offsets[OffsetIdx]; + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + for (int ChanelIdx = 0, VecIdx = OffsetIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { + + // Keeping original values for return + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::store) { + atomic_store((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { + if constexpr (!__ESIMD_DNS::is_fp_type::value) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { + if constexpr (!__ESIMD_DNS::is_fp_type::value) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { + // TODO : Type Check? Integral type only? + atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { + // TODO : Type Check? Integral type only? + atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { + // TODO : Type Check? Integral type only? + atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } + } + } + return Output; +} + +/// Helper function for two-source LSC-atomic operation accessing BTI +/// or SLM +template +auto __esimd_emu_lsc_xatomic_offset_access_2( + __ESIMD_DNS::simd_mask_storage_t Pred, + __ESIMD_DNS::vector_type_t Offsets, const char *BaseAddr, + const int BufByteWidth, + __ESIMD_DNS::vector_type_t()> src0, + __ESIMD_DNS::vector_type_t()> src1) { + + assert(BaseAddr != nullptr && + "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); + + __ESIMD_DNS::vector_type_t()> Output; + + static_assert(AddressScale == 1); + static_assert(ImmOffset == 0); + static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); + + for (int OffsetIdx = 0; OffsetIdx < N; OffsetIdx += 1) { + if (Pred[OffsetIdx] == 0) { + // Skip input vector elements correpsonding to + // predicates whose value is zero + continue; + } + + assert(((Offsets[OffsetIdx] & MASK)) == 0 && "Offset Alignment Error!!"); + + // ByteDistance : byte-distance from buffer-write base + int ByteDistance = Offsets[OffsetIdx]; + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + for (int ChanelIdx = 0, VecIdx = OffsetIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { + + // Keeping original values for return + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::icas) { + atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], + src1[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fcas) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], + src1[VecIdx]); + } + } + } + } + } + return Output; +} + +// End : Shared utility/helper functions for LSC support under +// emulation +#endif // __SYCL_DEVICE_ONLY__ /// SLM gather. /// Supported platforms: DG2, PVC @@ -1122,8 +1325,13 @@ __esimd_lsc_xatomic_slm_0(__ESIMD_DNS::simd_mask_storage_t pred, ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + return __esimd_emu_lsc_xatomic_offset_access_0< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>(pred, offsets, + I->__cm_emu_get_slm_ptr(), INT_MAX); } #endif // __SYCL_DEVICE_ONLY__ @@ -1157,8 +1365,13 @@ __esimd_lsc_xatomic_slm_1( ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + return __esimd_emu_lsc_xatomic_offset_access_1< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>( + pred, offsets, I->__cm_emu_get_slm_ptr(), INT_MAX, src0); } #endif // __SYCL_DEVICE_ONLY__ @@ -1194,8 +1407,13 @@ __esimd_lsc_xatomic_slm_2( ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + return __esimd_emu_lsc_xatomic_offset_access_2< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>( + pred, offsets, I->__cm_emu_get_slm_ptr(), INT_MAX, src0, src1); } #endif // __SYCL_DEVICE_ONLY__ @@ -1229,8 +1447,22 @@ __esimd_lsc_xatomic_bti_0(__ESIMD_DNS::simd_mask_storage_t pred, ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + char *accessBase; + uint32_t width; + std::mutex *mutexLock; + + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->sycl_get_cm_buffer_params_ptr(surf_ind, &accessBase, &width, &mutexLock); + + // Mutex is not needed as __atomic_* functions are used within + // helper function being called + // std::lock_guard lock(*mutexLock); + + return __esimd_emu_lsc_xatomic_offset_access_0< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>(pred, offsets, accessBase, width); } #endif // __SYCL_DEVICE_ONLY__ @@ -1267,8 +1499,23 @@ __esimd_lsc_xatomic_bti_1( ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + char *accessBase; + uint32_t width; + std::mutex *mutexLock; + + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->sycl_get_cm_buffer_params_ptr(surf_ind, &accessBase, &width, &mutexLock); + + // Mutex is not needed as __atomic_* functions are used within + // helper function being called + // std::lock_guard lock(*mutexLock); + + return __esimd_emu_lsc_xatomic_offset_access_1< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>(pred, offsets, accessBase, width, + src0); } #endif // __SYCL_DEVICE_ONLY__ @@ -1307,8 +1554,23 @@ __esimd_lsc_xatomic_bti_2( ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + char *accessBase; + uint32_t width; + std::mutex *mutexLock; + + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->sycl_get_cm_buffer_params_ptr(surf_ind, &accessBase, &width, &mutexLock); + + // Mutex is not needed as __atomic_* functions are used within + // helper function being called + // std::lock_guard lock(*mutexLock); + + return __esimd_emu_lsc_xatomic_offset_access_2< + Ty, Op, AddressScale, ImmOffset, DS, VS, _Transposed, N, + loadstoreAlignMask()>(pred, offsets, accessBase, width, + src0, src1); } #endif // __SYCL_DEVICE_ONLY__ @@ -1339,8 +1601,45 @@ __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t pred, ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + // TODO : Support AddressScale, ImmOffset + static_assert(AddressScale == 1); + static_assert(ImmOffset == 0); + static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); + + __ESIMD_DNS::vector_type_t()> Output = 0; + + for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { + if (pred[AddrIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } + + constexpr uint MASK = loadstoreAlignMask(); + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + int ByteDistance = 0; + uintptr_t BaseAddr = addrs[AddrIdx]; + + assert(((BaseAddr & MASK)) == 0 && "Address Alignment Error!!"); + + for (int ChanelIdx = 0, VecIdx = AddrIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + // Keeping original values for return + 'load' + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); + } + } + } + return Output; } #endif // __SYCL_DEVICE_ONLY__ @@ -1375,8 +1674,82 @@ __esimd_lsc_xatomic_stateless_1( ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + // TODO : Support AddressScale, ImmOffset + static_assert(AddressScale == 1); + static_assert(ImmOffset == 0); + static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); + + __ESIMD_DNS::vector_type_t()> Output = 0; + + for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { + if (pred[AddrIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } + + constexpr uint MASK = loadstoreAlignMask(); + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + int ByteDistance = 0; + uintptr_t BaseAddr = addrs[AddrIdx]; + + assert(((BaseAddr & MASK)) == 0 && "Address Alignment Error!!"); + + for (int ChanelIdx = 0, VecIdx = AddrIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + // Keeping original values for return + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::store) { + atomic_store((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { + if constexpr (!__ESIMD_DNS::is_fp_type::value) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { + if constexpr (!__ESIMD_DNS::is_fp_type::value) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { + // TODO : Type Check? Integral type only? + atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { + // TODO : Type Check? Integral type only? + atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { + // TODO : Type Check? Integral type only? + atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } + } + } + return Output; } #endif // __SYCL_DEVICE_ONLY__ @@ -1404,16 +1777,55 @@ template __ESIMD_INTRIN __ESIMD_DNS::vector_type_t()> __esimd_lsc_xatomic_stateless_2( - __ESIMD_DNS::simd_mask_storage_t pred, - __ESIMD_DNS::vector_type_t addrs, + __ESIMD_DNS::simd_mask_storage_t Pred, + __ESIMD_DNS::vector_type_t Addrs, __ESIMD_DNS::vector_type_t()> src0, __ESIMD_DNS::vector_type_t()> src1) #ifdef __SYCL_DEVICE_ONLY__ ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; - return 0; + // TODO : Support AddressScale, ImmOffset + static_assert(AddressScale == 1); + static_assert(ImmOffset == 0); + static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); + + __ESIMD_DNS::vector_type_t()> Output = 0; + + for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { + if (Pred[AddrIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } + + constexpr uint MASK = loadstoreAlignMask(); + constexpr int ChanlCount = __ESIMD_EDNS::to_int(); + + int ByteDistance = 0; + uintptr_t BaseAddr = Addrs[AddrIdx]; + + assert(((BaseAddr & MASK)) == 0 && "Address Alignment Error!!"); + + for (int ChanelIdx = 0, VecIdx = AddrIdx; ChanelIdx < ChanlCount; + ChanelIdx += 1, ByteDistance += rawAddressIncrement(), + VecIdx += vectorIndexIncrement()) { + + // Keeping original values for return + Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); + + if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::icas) { + atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], + src1[VecIdx]); + } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fcas) { + if constexpr (__ESIMD_DNS::is_fp_type::value) { + atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], + src1[VecIdx]); + } + } + } + } + return Output; } #endif // __SYCL_DEVICE_ONLY__ From de70cffc1c10229b0ee4a023c42af21689eb2c1c Mon Sep 17 00:00:00 2001 From: "Ahn, Dongkyun" Date: Wed, 17 Aug 2022 19:11:45 -0700 Subject: [PATCH 2/4] __ATOMIC_RELAXED to __ATOMIC_SEQ_CST --- .../ext/intel/esimd/detail/atomic_intrin.hpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp index 5eb2e0b08f6c1..7bdab03bf035c 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp @@ -19,7 +19,7 @@ template Ty atomic_load(Ty *ptr) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_load(ptr, __ATOMIC_RELAXED); + return __atomic_load(ptr, __ATOMIC_SEQ_CST); #endif } @@ -28,7 +28,7 @@ template Ty atomic_store(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - __atomic_store(ptr, val, __ATOMIC_RELAXED); + __atomic_store(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -37,7 +37,7 @@ template Ty atomic_add_fetch(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED); + return __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -46,7 +46,7 @@ template Ty atomic_sub_fetch(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_sub_fetch(ptr, val, __ATOMIC_RELAXED); + return __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -55,7 +55,7 @@ template Ty atomic_and_fetch(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_and_fetch(ptr, val, __ATOMIC_RELAXED); + return __atomic_and_fetch(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -64,7 +64,7 @@ template Ty atomic_or_fetch(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_or_fetch(ptr, val, __ATOMIC_RELAXED); + return __atomic_or_fetch(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -73,7 +73,7 @@ template Ty atomic_xor_fetch(Ty *ptr, Ty val) { // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_xor_fetch(ptr, val, __ATOMIC_RELAXED); + return __atomic_xor_fetch(ptr, val, __ATOMIC_SEQ_CST); #endif } @@ -87,7 +87,7 @@ template Ty atomic_min(Ty *ptr, Ty val) { _old = *ptr; _new = std::min(_old, val); } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); return _new; #endif } @@ -102,7 +102,7 @@ template Ty atomic_max(Ty *ptr, Ty val) { _old = *ptr; _new = std::max(_old, val); } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); return _new; #endif } @@ -113,8 +113,8 @@ template Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { __ESIMD_UNSUPPORTED_ON_HOST; #else Ty _old = expected; - __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST); return *ptr; #endif } From 6cbb79d1fa336a21299dcd5696c5317cea33fdc8 Mon Sep 17 00:00:00 2001 From: "Ahn, Dongkyun" Date: Thu, 18 Aug 2022 15:22:39 -0700 Subject: [PATCH 3/4] __ESIMD_DNS namespace for atomic_* functions --- .../ext/intel/esimd/detail/atomic_intrin.hpp | 4 + .../esimd/detail/memory_intrin.hpp | 116 +++++++++++------- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp index 7bdab03bf035c..6c4e9b2fd4b2d 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp @@ -11,6 +11,8 @@ #include +namespace __ESIMD_DNS { + // This function implements atomic update of pre-existing variable in the // absense of C++ 20's atomic_ref. @@ -119,4 +121,6 @@ template Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { #endif } +} // namespace __ESIMD_DNS + /// @endcond ESIMD_DETAIL diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 8b35815877c32..e6a18ac0fab05 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -610,11 +610,11 @@ auto __esimd_emu_lsc_xatomic_offset_access_0( if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); } } } @@ -666,48 +666,62 @@ auto __esimd_emu_lsc_xatomic_offset_access_1( Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::store) { - atomic_store((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_store((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { // TODO : Type Check? Integral type only? - atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { // TODO : Type Check? Integral type only? - atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { // TODO : Type Check? Integral type only? - atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } } @@ -760,12 +774,12 @@ auto __esimd_emu_lsc_xatomic_offset_access_2( Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::icas) { - atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], - src1[VecIdx]); + __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx], src1[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fcas) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], - src1[VecIdx]); + __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx], src1[VecIdx]); } } } @@ -1631,11 +1645,11 @@ __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t pred, Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iinc) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::idec) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + static_cast(1)); } } } @@ -1704,48 +1718,62 @@ __esimd_lsc_xatomic_stateless_1( Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::store) { - atomic_store((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_store((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::iadd) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::isub) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smin) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::smax) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umin) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::umax) { if constexpr (!__ESIMD_DNS::is_fp_type::value) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fadd) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fsub) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmin) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_min((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fmax) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_max((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_and) { // TODO : Type Check? Integral type only? - atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_or) { // TODO : Type Check? Integral type only? - atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::bit_xor) { // TODO : Type Check? Integral type only? - atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx]); } } } @@ -1815,12 +1843,12 @@ __esimd_lsc_xatomic_stateless_2( Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::icas) { - atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], - src1[VecIdx]); + __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx], src1[VecIdx]); } else if constexpr (Op == __ESIMD_EDNS::lsc_atomic_op::fcas) { if constexpr (__ESIMD_DNS::is_fp_type::value) { - atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), src0[VecIdx], - src1[VecIdx]); + __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), + src0[VecIdx], src1[VecIdx]); } } } From 7b1f5fa9e139632c62063f99c488f602c509168b Mon Sep 17 00:00:00 2001 From: "Ahn, Dongkyun" Date: Thu, 18 Aug 2022 18:28:54 -0700 Subject: [PATCH 4/4] Test failure fixes Missing change for atomic_add_fetch() - SYCL :: esimd/flat_atomic.cpp - SYCL :: esimd/slm_atomic.cpp --- sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp index 4dd4fefe5e954..9f906dc3cfdba 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp @@ -504,7 +504,7 @@ __esimd_svm_atomic1(__ESIMD_DNS::vector_type_t addrs, switch (Op) { case __ESIMD_NS::atomic_op::add: - retv[i] = atomic_add_fetch(p, src0[i]); + retv[i] = __ESIMD_DNS::atomic_add_fetch(p, src0[i]); break; default: __ESIMD_UNSUPPORTED_ON_HOST; @@ -847,7 +847,7 @@ __esimd_dword_atomic0(__ESIMD_DNS::simd_mask_storage_t pred, switch (Op) { case __ESIMD_NS::atomic_op::inc: - retv[i] = atomic_add_fetch(p, 1); + retv[i] = __ESIMD_DNS::atomic_add_fetch(p, 1); break; default: __ESIMD_UNSUPPORTED_ON_HOST;