-
Notifications
You must be signed in to change notification settings - Fork 807
Handler-less kernel submit path (parallel_for with nd_range) #19294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3223842
fde19ca
13424de
fbc789d
591b3ec
d235b7c
6641601
0f41d5a
a6e711e
9c8040e
31cbdb9
c5cd091
998d592
4000c07
f8e9cd6
01af8bb
4469e59
ac1a5cf
5865f3a
072803c
27b3110
9041e94
ac2c5bb
8e155fb
502f637
d708c93
e9f6e4e
057a7a5
77d92ca
85aaa5c
a54422a
967d35e
ec1ef89
1f95b9b
12ef6da
2980531
01e0f9f
63d1345
4001fea
6c9525b
f871b10
18df56b
3375e77
72dc199
9715916
177277b
eb9a5d6
1f8ea92
74438ae
0e48eb6
0d47ac7
ab6812a
a35286d
27a5cf5
9144f84
ca0b632
42e2b30
943f1f7
76bcaf2
6588fe8
3c0e33c
8a20b8a
2be3d3d
f139c93
8023ec1
de94db6
066b421
eed0591
552f448
f5c0d77
b442d37
5fa8ccc
fa6d2f8
f04ed3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,6 +62,20 @@ template <backend BackendName, class SyclObjectT> | |
auto get_native(const SyclObjectT &Obj) | ||
-> backend_return_t<BackendName, SyclObjectT>; | ||
|
||
template <int Dims> | ||
event __SYCL_EXPORT submit_kernel_direct_with_event_impl( | ||
const queue &Queue, const nd_range<Dims> &Range, | ||
std::shared_ptr<detail::HostKernelBase> &HostKernel, | ||
detail::DeviceKernelInfo *DeviceKernelInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc); | ||
|
||
template <int Dims> | ||
void __SYCL_EXPORT submit_kernel_direct_without_event_impl( | ||
const queue &Queue, const nd_range<Dims> &Range, | ||
std::shared_ptr<detail::HostKernelBase> &HostKernel, | ||
detail::DeviceKernelInfo *DeviceKernelInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc); | ||
|
||
namespace detail { | ||
class queue_impl; | ||
|
||
|
@@ -141,6 +155,51 @@ class __SYCL_EXPORT SubmissionInfo { | |
}; | ||
|
||
} // namespace v1 | ||
|
||
template <typename KernelName = detail::auto_name, bool EventNeeded = false, | ||
typename PropertiesT, typename KernelType, int Dims> | ||
auto submit_kernel_direct( | ||
const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range, | ||
const KernelType &KernelFunc, | ||
const detail::code_location &CodeLoc = detail::code_location::current()) { | ||
// TODO Properties not supported yet | ||
(void)Props; | ||
static_assert( | ||
std::is_same_v<PropertiesT, | ||
ext::oneapi::experimental::empty_properties_t>, | ||
"Setting properties not supported yet for no-CGH kernel submit."); | ||
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); | ||
|
||
using NameT = | ||
typename detail::get_kernel_name_t<KernelName, KernelType>::name; | ||
using LambdaArgType = | ||
sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>; | ||
static_assert( | ||
std::is_convertible_v<sycl::nd_item<Dims>, LambdaArgType>, | ||
"Kernel argument of a sycl::parallel_for with sycl::nd_range " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could the text be altered in the subsequent patches, as this code can be called not only from parallel_for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the plan is to extend this to other functions once parallel_for(nd_range) is complete. |
||
"must be either sycl::nd_item or be convertible from sycl::nd_item"); | ||
using TransformedArgType = sycl::nd_item<Dims>; | ||
|
||
std::shared_ptr<detail::HostKernelBase> HostKernel = std::make_shared< | ||
detail::HostKernel<KernelType, TransformedArgType, Dims>>(KernelFunc); | ||
|
||
detail::DeviceKernelInfo *DeviceKernelInfoPtr = | ||
&detail::getDeviceKernelInfo<NameT>(); | ||
|
||
detail::KernelWrapper<detail::WrapAs::parallel_for, NameT, KernelType, | ||
TransformedArgType, PropertiesT>::wrap(KernelFunc); | ||
|
||
if constexpr (EventNeeded) { | ||
return submit_kernel_direct_with_event_impl( | ||
Queue, Range, HostKernel, DeviceKernelInfoPtr, | ||
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); | ||
} else { | ||
submit_kernel_direct_without_event_impl( | ||
Queue, Range, HostKernel, DeviceKernelInfoPtr, | ||
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); | ||
} | ||
} | ||
|
||
} // namespace detail | ||
|
||
namespace ext ::oneapi ::experimental { | ||
|
@@ -3203,11 +3262,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
parallel_for(nd_range<Dims> Range, RestT &&...Rest) { | ||
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>(); | ||
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); | ||
return submit( | ||
[&](handler &CGH) { | ||
CGH.template parallel_for<KernelName>(Range, Rest...); | ||
}, | ||
TlsCodeLocCapture.query()); | ||
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT | ||
// TODO The handler-less path does not support reductions yet. | ||
if constexpr (sizeof...(RestT) == 1) { | ||
return detail::submit_kernel_direct<KernelName, true>( | ||
*this, ext::oneapi::experimental::empty_properties_t{}, Range, | ||
Rest...); | ||
} else | ||
#endif | ||
{ | ||
return submit( | ||
[&](handler &CGH) { | ||
CGH.template parallel_for<KernelName>(Range, Rest...); | ||
}, | ||
TlsCodeLocCapture.query()); | ||
} | ||
} | ||
|
||
/// parallel_for version with a kernel represented as a lambda + nd_range that | ||
|
Uh oh!
There was an error while loading. Please reload this page.