diff --git a/SYCL/Reduction/reduction_internal.cpp b/SYCL/Reduction/reduction_internal.cpp new file mode 100644 index 0000000000..d87a6e8b62 --- /dev/null +++ b/SYCL/Reduction/reduction_internal.cpp @@ -0,0 +1,147 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +using namespace sycl; + +template auto get_global_range(range Range) { return Range; } +template auto get_global_range(nd_range NDRange) { + return NDRange.get_global_range(); +} + +template +auto get_global_id(item Item) { + return Item.get_id(); +} +template auto get_global_id(nd_item NDItem) { + return NDItem.get_global_id(); +} + +template auto get_global_id(id Id) { return Id; } + +// We can select strategy explicitly so no need to test all combinations of +// types/operations. +using T = int; +using BinOpTy = std::plus; + +// On Windows, allocating new memory and then initializing it is slow for some +// reason (not related to reductions). Try to re-use the same memory between +// test cases. +struct RedStorage { + RedStorage(queue &q) : q(q), Ptr(malloc_device(1, q)), Buf(1) {} + ~RedStorage() { free(Ptr, q); } + + template auto get() { + if constexpr (UseUSM) + return Ptr; + else + return Buf; + } + queue &q; + T *Ptr; + buffer Buf; +}; + +template +static void test(RedStorage &Storage, RangeTy Range) { + queue &q = Storage.q; + + T Init{19}; + + auto Red = Storage.get(); + auto GetRedAcc = [&](handler &cgh) { + if constexpr (UseUSM) + return Red; + else + return accessor{Red, cgh}; + }; + + q.submit([&](handler &cgh) { + auto RedAcc = GetRedAcc(cgh); + cgh.single_task([=]() { RedAcc[0] = Init; }); + }).wait(); + + q.submit([&](handler &cgh) { + auto RedSycl = [&]() { + if constexpr (UseUSM) + if constexpr (InitToIdentity) + return reduction(Red, BinOpTy{}, + property::reduction::initialize_to_identity{}); + else + return reduction(Red, BinOpTy{}); + else if constexpr (InitToIdentity) + return reduction(Red, cgh, BinOpTy{}, + property::reduction::initialize_to_identity{}); + else + return reduction(Red, cgh, BinOpTy{}); + }(); + detail::reduction_parallel_for( + cgh, Range, ext::oneapi::experimental::detail::empty_properties_t{}, + RedSycl, [=](auto Item, auto &Red) { Red.combine(T{1}); }); + }).wait(); + + auto *Result = malloc_shared(1, q); + q.submit([&](handler &cgh) { + auto RedAcc = GetRedAcc(cgh); + cgh.single_task([=]() { *Result = RedAcc[0]; }); + }).wait(); + + auto N = get_global_range(Range).size(); + int Expected = InitToIdentity ? N : Init + N; +#if defined(__PRETTY_FUNCTION__) + std::cout << __PRETTY_FUNCTION__; +#elif defined(__FUNCSIG__) + std::cout << __FUNCSIG__; +#endif + std::cout << ": " << *Result << ", expected " << Expected << std::endl; + assert(*Result == Expected); + + free(Result, q); +} + +template +void loop_impl(std::integer_sequence, F &&f) { + (f(std::integral_constant{}), ...); +} + +template void loop(F &&f) { + loop_impl(std::make_integer_sequence{}, std::forward(f)); +} + +template +void testAllStrategies(RedStorage &Storage, RangeTy Range) { + loop<(int)detail::reduction::strategy::multi>([&](auto Id) { + constexpr auto Strategy = + // Skip auto_select == 0. + detail::reduction::strategy{decltype(Id)::value + 1}; + test(Storage, Range); + }); +} + +int main() { + queue q; + RedStorage Storage(q); + + auto TestRange = [&](auto Range) { + testAllStrategies(Storage, Range); + testAllStrategies(Storage, Range); + testAllStrategies(Storage, Range); + testAllStrategies(Storage, Range); + }; + + TestRange(range<1>{42}); + TestRange(range<2>{8, 8}); + TestRange(range<3>{7, 7, 5}); + TestRange(nd_range<1>{range<1>{7}, range<1>{7}}); + TestRange(nd_range<1>{range<1>{3 * 3}, range<1>{3}}); + + // TODO: Strategies historically adopted from sycl::range implementation only + // support 1-Dim case. + // + // TestRange(nd_range<2>{range<2>{7, 3}, range<2> {7, 3}}); + // TestRange(nd_range<2>{range<2>{14, 9}, range<2> {7, 3}}); + return 0; +}