diff --git a/sycl/include/sycl/ext/intel/esimd/memory.hpp b/sycl/include/sycl/ext/intel/esimd/memory.hpp index d7f9130df4007..6f3a1ebb11a2e 100644 --- a/sycl/include/sycl/ext/intel/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/esimd/memory.hpp @@ -247,7 +247,7 @@ block_load_impl(const T *p, simd_mask<1> pred, FlagsT flags) { constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); @@ -333,7 +333,7 @@ block_load_impl(const T *p, simd_mask<1> pred, simd pass_thru, constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); @@ -430,7 +430,7 @@ __ESIMD_API constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); @@ -529,7 +529,7 @@ __ESIMD_API constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); @@ -586,9 +586,7 @@ block_store_impl(T *p, simd vals, simd_mask<1> pred, FlagsT flags) { constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - sizeof(uint32_t) / sizeof(T) > static_cast(1) - ? sizeof(uint32_t) / sizeof(T) - : static_cast(1); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed store"); @@ -2614,7 +2612,7 @@ slm_block_load(uint32_t byte_offset, simd_mask<1> pred, constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(N > 0 && N % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); @@ -2700,7 +2698,7 @@ slm_block_load(uint32_t offset, simd_mask<1> pred, simd pass_thru, constexpr int SmallIntFactor64Bit = sizeof(uint64_t) / sizeof(T); constexpr int SmallIntFactor32Bit = - (std::max)(static_cast(1), sizeof(uint32_t) / sizeof(T)); + sizeof(uint32_t) / sizeof(T) > 1 ? sizeof(uint32_t) / sizeof(T) : 1; static_assert(N > 0 && N % SmallIntFactor32Bit == 0, "Number of elements is not supported by Transposed load"); diff --git a/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp b/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp index 5c86b194c192b..0a23acf1b053c 100644 --- a/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp +++ b/sycl/include/sycl/ext/intel/esimd/xmx/dpas.hpp @@ -89,8 +89,11 @@ constexpr int verify_parameters_and_deduce_exec_size() { "Cannot deduce element size of input arguments"); verify_repeat_count(); + constexpr int MaxElemBitSize = + AElemBitSize > BElemBitSize ? AElemBitSize : BElemBitSize; + constexpr int MaxElemsInDword = 32 / MaxElemBitSize; constexpr int OpsPerChannel = - (std::max)((std::min)(32 / (std::max)(AElemBitSize, BElemBitSize), 8), 1); + MaxElemsInDword > 8 ? 8 : (MaxElemsInDword < 1 ? 1 : MaxElemsInDword); // A(_Mx_K) * B(_Kx_N) + C(_Mx_N) // where: