Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions include/common/core/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,17 @@ enum class data_size : uint8_t {
/// The specific LSC shared function to fence with xetla_fence
enum class memory_kind : uint8_t {
untyped_global = 0, /// untyped global memory
untyped_global_low_pri = 1, /// low-priority untyped global memory
typed_global = 2, /// typed global memory
shared_local = 3, /// shared local memory
typed_global = 1, /// typed global memory
shared_local = 2, /// shared local memory
};

/// The xetla_fence operation to apply to caches
enum class fence_op : uint8_t {
none = 0, /// no operation
evict = 1, /// dirty lines evicted and invalidated from L1
invalidate = 2, /// invalidate all clean lines
discard = 3, /// direct and clean lines are discarded w/o eviction

clean = 4, /// dirty lines are written to memory, but retained in cache
/// in clean state
flushl2 = 5, /// flush only L2
};
/// The scope that xetla_fence operation should apply to
enum class fence_scope : uint8_t {
Expand Down
46 changes: 20 additions & 26 deletions include/common/core/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,55 +72,49 @@ constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds) {
/// @brief lookup table for memory kind.
///
///
constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind(
constexpr sycl::ext::intel::esimd::memory_kind get_memory_kind(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that we can use __ESIMD_NS (include/common/core/common.hpp)

gpu::xetla::memory_kind mk) {
switch (mk) {
case gpu::xetla::memory_kind::untyped_global:
return __ESIMD_ENS::lsc_memory_kind::untyped_global;
case gpu::xetla::memory_kind::untyped_global_low_pri:
return __ESIMD_ENS::lsc_memory_kind::untyped_global_low_pri;
return sycl::ext::intel::esimd::memory_kind::image;
case gpu::xetla::memory_kind::typed_global:
return __ESIMD_ENS::lsc_memory_kind::typed_global;
return sycl::ext::intel::esimd::memory_kind::global;
case gpu::xetla::memory_kind::shared_local:
return __ESIMD_ENS::lsc_memory_kind::shared_local;
return sycl::ext::intel::esimd::memory_kind::local;
}
}

/// @brief lookup table for fence op.
///
///
constexpr __ESIMD_ENS::lsc_fence_op get_fence_op(gpu::xetla::fence_op fo) {
constexpr sycl::ext::intel::esimd::fence_flush_op get_fence_op(gpu::xetla::fence_op fo) {
switch (fo) {
case gpu::xetla::fence_op::none: return __ESIMD_ENS::lsc_fence_op::none;
case gpu::xetla::fence_op::none: return sycl::ext::intel::esimd::fence_flush_op::none;
case gpu::xetla::fence_op::evict:
return __ESIMD_ENS::lsc_fence_op::evict;
return sycl::ext::intel::esimd::fence_flush_op::evict;
case gpu::xetla::fence_op::invalidate:
return __ESIMD_ENS::lsc_fence_op::invalidate;
case gpu::xetla::fence_op::discard:
return __ESIMD_ENS::lsc_fence_op::discard;
return sycl::ext::intel::esimd::fence_flush_op::invalidate;
case gpu::xetla::fence_op::clean:
return __ESIMD_ENS::lsc_fence_op::clean;
case gpu::xetla::fence_op::flushl2:
return __ESIMD_ENS::lsc_fence_op::flushl3;
return sycl::ext::intel::esimd::fence_flush_op::clean;
}
}

/// @brief lookup table for fence scope.
///
///
constexpr __ESIMD_ENS::lsc_scope get_fence_scope(gpu::xetla::fence_scope fs) {
constexpr sycl::ext::intel::esimd::fence_scope get_fence_scope(gpu::xetla::fence_scope fs) {
switch (fs) {
case gpu::xetla::fence_scope::group:
return __ESIMD_ENS::lsc_scope::group;
return sycl::ext::intel::esimd::fence_scope::group;
case gpu::xetla::fence_scope::local:
return __ESIMD_ENS::lsc_scope::local;
case gpu::xetla::fence_scope::tile: return __ESIMD_ENS::lsc_scope::tile;
case gpu::xetla::fence_scope::gpu: return __ESIMD_ENS::lsc_scope::gpu;
case gpu::xetla::fence_scope::gpus: return __ESIMD_ENS::lsc_scope::gpus;
return sycl::ext::intel::esimd::fence_scope::local;
case gpu::xetla::fence_scope::tile: return sycl::ext::intel::esimd::fence_scope::tile;
case gpu::xetla::fence_scope::gpu: return sycl::ext::intel::esimd::fence_scope::gpu;
case gpu::xetla::fence_scope::gpus: return sycl::ext::intel::esimd::fence_scope::gpus;
case gpu::xetla::fence_scope::system:
return __ESIMD_ENS::lsc_scope::system;
return sycl::ext::intel::esimd::fence_scope::system;
case gpu::xetla::fence_scope::sysacq:
return __ESIMD_ENS::lsc_scope::sysacq;
return sycl::ext::intel::esimd::fence_scope::system_acquire;
}
}

Expand Down Expand Up @@ -635,10 +629,10 @@ __XETLA_API xetla_vector<T, N> xetla_atomic_local(
template <memory_kind Kind = memory_kind::untyped_global,
fence_op FenceOp = fence_op::none,
fence_scope Scope = fence_scope::group, int N = 16>
__XETLA_API void xetla_fence(xetla_mask<N> pred = 1) {
__ESIMD_ENS::lsc_fence<gpu::xetla::detail::get_memory_kind(Kind),
__XETLA_API void xetla_fence() {
sycl::ext::intel::esimd::fence<gpu::xetla::detail::get_memory_kind(Kind),
gpu::xetla::detail::get_fence_op(FenceOp),
gpu::xetla::detail::get_fence_scope(Scope), N>(pred);
gpu::xetla::detail::get_fence_scope(Scope)>();
}

/// @} xetla_core_memory
Expand Down
4 changes: 2 additions & 2 deletions include/subgroup/tile/impl/tile_op_functor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ struct elemwise_reduce_op_t {};
/// @brief Is the element-wise reduce op functor, specialized for Xe architecture.
template <reduce_op reduce_kind_, typename dtype_in_, gpu_arch arch_tag>
struct elemwise_reduce_op_t<reduce_kind_, dtype_in_, arch_tag,
std::enable_if_t<(arch_tag == gpu_arch::Xe)>> {
std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> {
using dtype_in = dtype_in_;
using mem_desc_in_t
= mem_desc_t<dtype_in, mem_layout::row_major, mem_space::global>;
Expand Down Expand Up @@ -1148,7 +1148,7 @@ struct linear_op_t {};
/// @brief Is the linear_op functor, specialized for Xe architecture.
template <typename dtype_in_, gpu_arch arch_tag>
struct linear_op_t<dtype_in_, arch_tag,
std::enable_if_t<(arch_tag == gpu_arch::Xe)>> {
std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> {
using dtype_in = dtype_in_;
using mem_desc_in_t
= mem_desc_t<dtype_in, mem_layout::row_major, mem_space::global>;
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/profiling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class profiling_helper {
//time mean square error
for (int i = 1; i < iter; i++) {
#if (__LIBSYCL_MAJOR_VERSION >= 7) && (__LIBSYCL_MINOR_VERSION >= 1)
stat.variance += sycl::pow(time[i] - stat.mean, 2);
stat.variance += sycl::pow(time[i] - stat.mean, (double)2);
#else
stat.variance += pow(time[i] - stat.mean, 2);
#endif
Expand Down