From b8b5f5760afb4f1ed4078caaea189491159c1846 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Mon, 5 Feb 2024 07:12:39 +0000 Subject: [PATCH 1/3] upgrade oneapi to 2024.1 1/2 - lsc_fence -> fence --- include/common/core/common.hpp | 9 +++---- include/common/core/memory.hpp | 46 +++++++++++++++------------------- 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/include/common/core/common.hpp b/include/common/core/common.hpp index ee4587410..3a63ec898 100644 --- a/include/common/core/common.hpp +++ b/include/common/core/common.hpp @@ -112,9 +112,8 @@ enum class data_size : uint8_t { /// The specific LSC shared function to fence with xetla_fence enum class memory_kind : uint8_t { untyped_global = 0, /// untyped global memory - untyped_global_low_pri = 1, /// low-priority untyped global memory - typed_global = 2, /// typed global memory - shared_local = 3, /// shared local memory + typed_global = 1, /// typed global memory + shared_local = 2, /// shared local memory }; /// The xetla_fence operation to apply to caches @@ -122,10 +121,8 @@ enum class fence_op : uint8_t { none = 0, /// no operation evict = 1, /// dirty lines evicted and invalidated from L1 invalidate = 2, /// invalidate all clean lines - discard = 3, /// direct and clean lines are discarded w/o eviction + clean = 4, /// dirty lines are written to memory, but retained in cache - /// in clean state - flushl2 = 5, /// flush only L2 }; /// The scope that xetla_fence operation should apply to enum class fence_scope : uint8_t { diff --git a/include/common/core/memory.hpp b/include/common/core/memory.hpp index cfb0a1a7a..5012896a6 100644 --- a/include/common/core/memory.hpp +++ b/include/common/core/memory.hpp @@ -72,55 +72,49 @@ constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds) { /// @brief lookup table for memory kind. /// /// -constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind( +constexpr sycl::ext::intel::esimd::memory_kind get_memory_kind( gpu::xetla::memory_kind mk) { switch (mk) { case gpu::xetla::memory_kind::untyped_global: - return __ESIMD_ENS::lsc_memory_kind::untyped_global; - case gpu::xetla::memory_kind::untyped_global_low_pri: - return __ESIMD_ENS::lsc_memory_kind::untyped_global_low_pri; + return sycl::ext::intel::esimd::memory_kind::image; case gpu::xetla::memory_kind::typed_global: - return __ESIMD_ENS::lsc_memory_kind::typed_global; + return sycl::ext::intel::esimd::memory_kind::global; case gpu::xetla::memory_kind::shared_local: - return __ESIMD_ENS::lsc_memory_kind::shared_local; + return sycl::ext::intel::esimd::memory_kind::local; } } /// @brief lookup table for fence op. /// /// -constexpr __ESIMD_ENS::lsc_fence_op get_fence_op(gpu::xetla::fence_op fo) { +constexpr sycl::ext::intel::esimd::fence_flush_op get_fence_op(gpu::xetla::fence_op fo) { switch (fo) { - case gpu::xetla::fence_op::none: return __ESIMD_ENS::lsc_fence_op::none; + case gpu::xetla::fence_op::none: return sycl::ext::intel::esimd::fence_flush_op::none; case gpu::xetla::fence_op::evict: - return __ESIMD_ENS::lsc_fence_op::evict; + return sycl::ext::intel::esimd::fence_flush_op::evict; case gpu::xetla::fence_op::invalidate: - return __ESIMD_ENS::lsc_fence_op::invalidate; - case gpu::xetla::fence_op::discard: - return __ESIMD_ENS::lsc_fence_op::discard; + return sycl::ext::intel::esimd::fence_flush_op::invalidate; case gpu::xetla::fence_op::clean: - return __ESIMD_ENS::lsc_fence_op::clean; - case gpu::xetla::fence_op::flushl2: - return __ESIMD_ENS::lsc_fence_op::flushl3; + return sycl::ext::intel::esimd::fence_flush_op::clean; } } /// @brief lookup table for fence scope. /// /// -constexpr __ESIMD_ENS::lsc_scope get_fence_scope(gpu::xetla::fence_scope fs) { +constexpr sycl::ext::intel::esimd::fence_scope get_fence_scope(gpu::xetla::fence_scope fs) { switch (fs) { case gpu::xetla::fence_scope::group: - return __ESIMD_ENS::lsc_scope::group; + return sycl::ext::intel::esimd::fence_scope::group; case gpu::xetla::fence_scope::local: - return __ESIMD_ENS::lsc_scope::local; - case gpu::xetla::fence_scope::tile: return __ESIMD_ENS::lsc_scope::tile; - case gpu::xetla::fence_scope::gpu: return __ESIMD_ENS::lsc_scope::gpu; - case gpu::xetla::fence_scope::gpus: return __ESIMD_ENS::lsc_scope::gpus; + return sycl::ext::intel::esimd::fence_scope::local; + case gpu::xetla::fence_scope::tile: return sycl::ext::intel::esimd::fence_scope::tile; + case gpu::xetla::fence_scope::gpu: return sycl::ext::intel::esimd::fence_scope::gpu; + case gpu::xetla::fence_scope::gpus: return sycl::ext::intel::esimd::fence_scope::gpus; case gpu::xetla::fence_scope::system: - return __ESIMD_ENS::lsc_scope::system; + return sycl::ext::intel::esimd::fence_scope::system; case gpu::xetla::fence_scope::sysacq: - return __ESIMD_ENS::lsc_scope::sysacq; + return sycl::ext::intel::esimd::fence_scope::system_acquire; } } @@ -635,10 +629,10 @@ __XETLA_API xetla_vector xetla_atomic_local( template -__XETLA_API void xetla_fence(xetla_mask pred = 1) { - __ESIMD_ENS::lsc_fence(pred); + gpu::xetla::detail::get_fence_scope(Scope)>(); } /// @} xetla_core_memory From c706d3d9f737459cd2120a027cb305517f890dd9 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Mon, 5 Feb 2024 07:13:15 +0000 Subject: [PATCH 2/3] upgrade oneapi to 2024.1 2/2 - pow(T1, T2) -> pow(T, T) --- tests/utils/profiling.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/profiling.hpp b/tests/utils/profiling.hpp index d4c10b8ca..60ef682ab 100644 --- a/tests/utils/profiling.hpp +++ b/tests/utils/profiling.hpp @@ -103,7 +103,7 @@ class profiling_helper { //time mean square error for (int i = 1; i < iter; i++) { #if (__LIBSYCL_MAJOR_VERSION >= 7) && (__LIBSYCL_MINOR_VERSION >= 1) - stat.variance += sycl::pow(time[i] - stat.mean, 2); + stat.variance += sycl::pow(time[i] - stat.mean, (double)2); #else stat.variance += pow(time[i] - stat.mean, 2); #endif From 7d23db72dd44bbe7c9d0ba103f62ae17e90cc5f2 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Thu, 22 Feb 2024 10:12:12 +0000 Subject: [PATCH 3/3] enable elemwise_reduce and linear tile_op for arc --- include/subgroup/tile/impl/tile_op_functor.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/subgroup/tile/impl/tile_op_functor.hpp b/include/subgroup/tile/impl/tile_op_functor.hpp index 0f56b2554..379bd062d 100644 --- a/include/subgroup/tile/impl/tile_op_functor.hpp +++ b/include/subgroup/tile/impl/tile_op_functor.hpp @@ -719,7 +719,7 @@ struct elemwise_reduce_op_t {}; /// @brief Is the element-wise reduce op functor, specialized for Xe architecture. template struct elemwise_reduce_op_t> { + std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> { using dtype_in = dtype_in_; using mem_desc_in_t = mem_desc_t; @@ -1148,7 +1148,7 @@ struct linear_op_t {}; /// @brief Is the linear_op functor, specialized for Xe architecture. template struct linear_op_t> { + std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> { using dtype_in = dtype_in_; using mem_desc_in_t = mem_desc_t;