Skip to content

Commit

Permalink
[SYCL] Add support for sorting using sub-group (#7374)
Browse files Browse the repository at this point in the history
Use provided work-group or sub-group instead of creating
a separate object in sort algorithms.
  • Loading branch information
romanovvlad committed Nov 16, 2022
1 parent c6d1caf commit 168767c
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 40 deletions.
13 changes: 6 additions & 7 deletions sycl/include/sycl/detail/group_sort_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

#pragma once

#if __cplusplus >= 201703L
#include <sycl/detail/helpers.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/multi_ptr.hpp>

#ifdef __SYCL_DEVICE_ONLY__

Expand Down Expand Up @@ -204,14 +205,13 @@ template <typename Group, typename Iter, typename Compare>
void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
std::byte *scratch) {
using T = typename GetValueType<Iter>::type;
auto id = sycl::detail::Builder::getNDItem<Group::dimensions>();
const std::size_t idx = id.get_local_linear_id();
const std::size_t idx = group.get_local_linear_id();
const std::size_t local = group.get_local_range().size();
const std::size_t chunk = (n - 1) / local + 1;

// we need to sort within work item first
bubble_sort(first, idx * chunk, sycl::min((idx + 1) * chunk, n), comp);
id.barrier();
sycl::group_barrier(group);

T *temp = reinterpret_cast<T *>(scratch);
bool data_in_temp = false;
Expand All @@ -231,7 +231,7 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
merge(offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk,
/*is_first*/ false);
}
id.barrier();
sycl::group_barrier(group);

data_in_temp = !data_in_temp;
sorted_size *= 2;
Expand All @@ -246,12 +246,11 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
first[idx * chunk + i] = temp[idx * chunk + i];
}
}
id.barrier();
sycl::group_barrier(group);
}
}

} // namespace detail
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
#endif
#endif // __cplusplus >=201703L
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ template <typename Compare = std::less<>> class default_sorter {
#ifdef __SYCL_DEVICE_ONLY__
auto range_size = g.get_local_range().size();
if (scratch_size >= memory_required<T>(Group::fence_scope, range_size)) {
auto id = sycl::detail::Builder::getNDItem<Group::dimensions>();
std::size_t local_id = id.get_local_linear_id();
std::size_t local_id = g.get_local_linear_id();
T *temp = reinterpret_cast<T *>(scratch);
::new (temp + local_id) T(val);
sycl::detail::merge_sort(g, temp, range_size, comp,
Expand Down
32 changes: 1 addition & 31 deletions sycl/include/sycl/group_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <sycl/ext/oneapi/functional.hpp>
#include <sycl/functional.hpp>
#include <sycl/group.hpp>
#include <sycl/group_barrier.hpp>
#include <sycl/known_identity.hpp>
#include <sycl/nd_item.hpp>
#include <sycl/sub_group.hpp>
Expand Down Expand Up @@ -1006,36 +1007,5 @@ joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
return joint_inclusive_scan(g, first, last, result, binary_op, init);
}

namespace detail {
template <typename G> struct group_barrier_scope {};
template <> struct group_barrier_scope<sycl::sub_group> {
constexpr static auto Scope = __spv::Scope::Subgroup;
};
template <int D> struct group_barrier_scope<sycl::group<D>> {
constexpr static auto Scope = __spv::Scope::Workgroup;
};
} // namespace detail

template <typename Group>
typename std::enable_if<is_group_v<Group>>::type
group_barrier(Group, memory_scope FenceScope = Group::fence_scope) {
(void)FenceScope;
#ifdef __SYCL_DEVICE_ONLY__
// Per SYCL spec, group_barrier must perform both control barrier and memory
// fence operations. All work-items execute a release fence prior to
// barrier and acquire fence afterwards. The rest of semantics flags specify
// which type of memory this behavior is applied to.
__spirv_ControlBarrier(detail::group_barrier_scope<Group>::Scope,
sycl::detail::spirv::getScope(FenceScope),
__spv::MemorySemanticsMask::SequentiallyConsistent |
__spv::MemorySemanticsMask::SubgroupMemory |
__spv::MemorySemanticsMask::WorkgroupMemory |
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
#else
throw sycl::runtime_error("Barriers are not supported on host device",
PI_ERROR_INVALID_DEVICE);
#endif
}

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
54 changes: 54 additions & 0 deletions sycl/include/sycl/group_barrier.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

//==------------------------- group_barrier.hpp ----------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/__spirv/spirv_ops.hpp>
#include <CL/__spirv/spirv_types.hpp>
#include <CL/__spirv/spirv_vars.hpp>
#include <sycl/detail/spirv.hpp>
#include <sycl/detail/type_traits.hpp>
#include <sycl/group.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {

namespace detail {
template <typename G> struct group_barrier_scope {};
template <> struct group_barrier_scope<sycl::sub_group> {
constexpr static auto Scope = __spv::Scope::Subgroup;
};
template <int D> struct group_barrier_scope<sycl::group<D>> {
constexpr static auto Scope = __spv::Scope::Workgroup;
};
} // namespace detail

template <typename Group>
typename std::enable_if<is_group_v<Group>>::type
group_barrier(Group, memory_scope FenceScope = Group::fence_scope) {
(void)FenceScope;
#ifdef __SYCL_DEVICE_ONLY__
// Per SYCL spec, group_barrier must perform both control barrier and memory
// fence operations. All work-items execute a release fence prior to
// barrier and acquire fence afterwards. The rest of semantics flags specify
// which type of memory this behavior is applied to.
__spirv_ControlBarrier(detail::group_barrier_scope<Group>::Scope,
sycl::detail::spirv::getScope(FenceScope),
__spv::MemorySemanticsMask::SequentiallyConsistent |
__spv::MemorySemanticsMask::SubgroupMemory |
__spv::MemorySemanticsMask::WorkgroupMemory |
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
#else
throw sycl::runtime_error("Barriers are not supported on host device",
PI_ERROR_INVALID_DEVICE);
#endif
}

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl

0 comments on commit 168767c

Please sign in to comment.