Skip to content

Commit

Permalink
[SYCL] Support kernels accepting item in range reduction parallel_for (
Browse files Browse the repository at this point in the history
…#7478)

Previously only sycl::id worked.
  • Loading branch information
aelovikov-intel committed Nov 22, 2022
1 parent 39b6672 commit 5d5e9f4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
11 changes: 9 additions & 2 deletions sycl/include/sycl/item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernel;
template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernelWithKH;

namespace reduction {
template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id);
} // namespace reduction
} // namespace detail
template <int dimensions> class id;
template <int dimensions> class range;

/// Identifies an instance of the function object executing at each point
/// in a range.
Expand Down Expand Up @@ -130,6 +133,10 @@ template <int dimensions = 1, bool with_offset = true> class item {
friend class detail::RoundedRangeKernelWithKH;
void set_allowed_range(const range<dimensions> rnwi) { MImpl.MExtent = rnwi; }

template <int Dims>
friend item<Dims, false>
detail::reduction::getDelinearizedItem(range<Dims> Range, id<Dims> Id);

detail::ItemBase<dimensions, with_offset> MImpl;
};

Expand Down
12 changes: 11 additions & 1 deletion sycl/include/sycl/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2368,8 +2368,18 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
size_t Start = GroupStart + NDId.get_local_id(0);
size_t End = GroupEnd;
size_t Stride = NDId.get_local_range(0);
auto GetDelinearized = [&](size_t I) {
auto Id = getDelinearizedId(Range, I);
if constexpr (std::is_invocable_v<decltype(KernelFunc), id<Dims>,
decltype(Reducers)...>)
return Id;
else
// SYCL doesn't provide parallel_for accepting offset in presence of
// reductions, so use with_offset==false.
return reduction::getDelinearizedItem(Range, Id);
};
for (size_t I = Start; I < End; I += Stride)
KernelFunc(getDelinearizedId(Range, I), Reducers...);
KernelFunc(GetDelinearized(I), Reducers...);
};
if constexpr (NumArgs == 2) {
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/sycl/reduction_forward.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ enum class strategy : int {
// are limited to those below.
inline void finalizeHandler(handler &CGH);
template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func);

template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id) {
return {Range, Id};
}
} // namespace reduction

template <typename KernelName,
Expand Down

0 comments on commit 5d5e9f4

Please sign in to comment.