diff --git a/include/common/core/common.hpp b/include/common/core/common.hpp index ee4587410..3a63ec898 100644 --- a/include/common/core/common.hpp +++ b/include/common/core/common.hpp @@ -112,9 +112,8 @@ enum class data_size : uint8_t { /// The specific LSC shared function to fence with xetla_fence enum class memory_kind : uint8_t { untyped_global = 0, /// untyped global memory - untyped_global_low_pri = 1, /// low-priority untyped global memory - typed_global = 2, /// typed global memory - shared_local = 3, /// shared local memory + typed_global = 1, /// typed global memory + shared_local = 2, /// shared local memory }; /// The xetla_fence operation to apply to caches @@ -122,10 +121,8 @@ enum class fence_op : uint8_t { none = 0, /// no operation evict = 1, /// dirty lines evicted and invalidated from L1 invalidate = 2, /// invalidate all clean lines - discard = 3, /// direct and clean lines are discarded w/o eviction + clean = 4, /// dirty lines are written to memory, but retained in cache - /// in clean state - flushl2 = 5, /// flush only L2 }; /// The scope that xetla_fence operation should apply to enum class fence_scope : uint8_t { diff --git a/include/common/core/memory.hpp b/include/common/core/memory.hpp index cfb0a1a7a..5012896a6 100644 --- a/include/common/core/memory.hpp +++ b/include/common/core/memory.hpp @@ -72,55 +72,49 @@ constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds) { /// @brief lookup table for memory kind. /// /// -constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind( +constexpr sycl::ext::intel::esimd::memory_kind get_memory_kind( gpu::xetla::memory_kind mk) { switch (mk) { case gpu::xetla::memory_kind::untyped_global: - return __ESIMD_ENS::lsc_memory_kind::untyped_global; - case gpu::xetla::memory_kind::untyped_global_low_pri: - return __ESIMD_ENS::lsc_memory_kind::untyped_global_low_pri; + return sycl::ext::intel::esimd::memory_kind::image; case gpu::xetla::memory_kind::typed_global: - return __ESIMD_ENS::lsc_memory_kind::typed_global; + return sycl::ext::intel::esimd::memory_kind::global; case gpu::xetla::memory_kind::shared_local: - return __ESIMD_ENS::lsc_memory_kind::shared_local; + return sycl::ext::intel::esimd::memory_kind::local; } } /// @brief lookup table for fence op. /// /// -constexpr __ESIMD_ENS::lsc_fence_op get_fence_op(gpu::xetla::fence_op fo) { +constexpr sycl::ext::intel::esimd::fence_flush_op get_fence_op(gpu::xetla::fence_op fo) { switch (fo) { - case gpu::xetla::fence_op::none: return __ESIMD_ENS::lsc_fence_op::none; + case gpu::xetla::fence_op::none: return sycl::ext::intel::esimd::fence_flush_op::none; case gpu::xetla::fence_op::evict: - return __ESIMD_ENS::lsc_fence_op::evict; + return sycl::ext::intel::esimd::fence_flush_op::evict; case gpu::xetla::fence_op::invalidate: - return __ESIMD_ENS::lsc_fence_op::invalidate; - case gpu::xetla::fence_op::discard: - return __ESIMD_ENS::lsc_fence_op::discard; + return sycl::ext::intel::esimd::fence_flush_op::invalidate; case gpu::xetla::fence_op::clean: - return __ESIMD_ENS::lsc_fence_op::clean; - case gpu::xetla::fence_op::flushl2: - return __ESIMD_ENS::lsc_fence_op::flushl3; + return sycl::ext::intel::esimd::fence_flush_op::clean; } } /// @brief lookup table for fence scope. /// /// -constexpr __ESIMD_ENS::lsc_scope get_fence_scope(gpu::xetla::fence_scope fs) { +constexpr sycl::ext::intel::esimd::fence_scope get_fence_scope(gpu::xetla::fence_scope fs) { switch (fs) { case gpu::xetla::fence_scope::group: - return __ESIMD_ENS::lsc_scope::group; + return sycl::ext::intel::esimd::fence_scope::group; case gpu::xetla::fence_scope::local: - return __ESIMD_ENS::lsc_scope::local; - case gpu::xetla::fence_scope::tile: return __ESIMD_ENS::lsc_scope::tile; - case gpu::xetla::fence_scope::gpu: return __ESIMD_ENS::lsc_scope::gpu; - case gpu::xetla::fence_scope::gpus: return __ESIMD_ENS::lsc_scope::gpus; + return sycl::ext::intel::esimd::fence_scope::local; + case gpu::xetla::fence_scope::tile: return sycl::ext::intel::esimd::fence_scope::tile; + case gpu::xetla::fence_scope::gpu: return sycl::ext::intel::esimd::fence_scope::gpu; + case gpu::xetla::fence_scope::gpus: return sycl::ext::intel::esimd::fence_scope::gpus; case gpu::xetla::fence_scope::system: - return __ESIMD_ENS::lsc_scope::system; + return sycl::ext::intel::esimd::fence_scope::system; case gpu::xetla::fence_scope::sysacq: - return __ESIMD_ENS::lsc_scope::sysacq; + return sycl::ext::intel::esimd::fence_scope::system_acquire; } } @@ -635,10 +629,10 @@ __XETLA_API xetla_vector xetla_atomic_local( template -__XETLA_API void xetla_fence(xetla_mask pred = 1) { - __ESIMD_ENS::lsc_fence(pred); + gpu::xetla::detail::get_fence_scope(Scope)>(); } /// @} xetla_core_memory diff --git a/include/subgroup/tile/impl/tile_op_functor.hpp b/include/subgroup/tile/impl/tile_op_functor.hpp index 0f56b2554..379bd062d 100644 --- a/include/subgroup/tile/impl/tile_op_functor.hpp +++ b/include/subgroup/tile/impl/tile_op_functor.hpp @@ -719,7 +719,7 @@ struct elemwise_reduce_op_t {}; /// @brief Is the element-wise reduce op functor, specialized for Xe architecture. template struct elemwise_reduce_op_t> { + std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> { using dtype_in = dtype_in_; using mem_desc_in_t = mem_desc_t; @@ -1148,7 +1148,7 @@ struct linear_op_t {}; /// @brief Is the linear_op functor, specialized for Xe architecture. template struct linear_op_t> { + std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> { using dtype_in = dtype_in_; using mem_desc_in_t = mem_desc_t; diff --git a/tests/utils/profiling.hpp b/tests/utils/profiling.hpp index d4c10b8ca..60ef682ab 100644 --- a/tests/utils/profiling.hpp +++ b/tests/utils/profiling.hpp @@ -103,7 +103,7 @@ class profiling_helper { //time mean square error for (int i = 1; i < iter; i++) { #if (__LIBSYCL_MAJOR_VERSION >= 7) && (__LIBSYCL_MINOR_VERSION >= 1) - stat.variance += sycl::pow(time[i] - stat.mean, 2); + stat.variance += sycl::pow(time[i] - stat.mean, (double)2); #else stat.variance += pow(time[i] - stat.mean, 2); #endif