Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
b5ac52a
Implement free function kernel enqueue functions
lbushi25 Nov 20, 2025
63d860c
Remove unused code
lbushi25 Nov 20, 2025
00e0f0d
Improve comments
lbushi25 Nov 20, 2025
cd92d0c
Fix LIT command typo
lbushi25 Nov 20, 2025
4621ff6
Fix compilation error
lbushi25 Nov 20, 2025
76e0f8b
Fix unused argument error
lbushi25 Nov 20, 2025
ce2a16b
Fix unit-tests failures
lbushi25 Nov 20, 2025
e88b0f9
Fix formatting
lbushi25 Nov 20, 2025
b1b3ce9
Add XFAIL for native CPU
lbushi25 Nov 20, 2025
8b685ea
Add more tests
lbushi25 Nov 21, 2025
4cc1d15
Add a templated kernel test
lbushi25 Nov 21, 2025
4e7847d
Add a test to check definition of kernel_function_s
lbushi25 Nov 21, 2025
51e7aff
Merge branch 'intel:sycl' into enqueue_free_functions
lbushi25 Nov 24, 2025
4dc3224
Update free_function_kernels_enqueue.cpp
lbushi25 Nov 24, 2025
0b6a0ac
Apply requested changes
lbushi25 Dec 2, 2025
a7c592e
Merge branch 'enqueue_free_functions' of https://github.com/lbushi25/…
lbushi25 Dec 2, 2025
c65ffbc
Some more refactoring
lbushi25 Dec 2, 2025
5b7c7de
Apply feedback
lbushi25 Dec 10, 2025
356b55a
Remove dead code
lbushi25 Dec 10, 2025
9830ba4
Remove more dead code
lbushi25 Dec 10, 2025
34b0b17
Add more tests
lbushi25 Dec 11, 2025
d42cbdc
Merge branch 'sycl' into enqueue_free_functions
lbushi25 Dec 15, 2025
1acde56
Address feedback
lbushi25 Dec 15, 2025
07d5043
Merge branch 'enqueue_free_functions' of https://github.com/lbushi25/…
lbushi25 Dec 15, 2025
efd5cbf
Fix formatting
lbushi25 Dec 15, 2025
92671f1
Update free_function_kernels_enqueue.cpp
lbushi25 Dec 15, 2025
426a0b4
Fix a bug in enqueue free functions implementation
lbushi25 Mar 6, 2026
8121863
Merge branch 'sycl' into enqueue_free_functions
lbushi25 Mar 6, 2026
5cebcb7
Add test cases
lbushi25 Mar 6, 2026
fa157b7
Merge branch 'enqueue_free_functions' of https://github.com/lbushi25/…
lbushi25 Mar 6, 2026
f8899f3
Update free_function_kernels_enqueue.cpp
lbushi25 Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -416,14 +416,16 @@ void nd_launch(queue Q, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
detail::submit_kernel_direct_parallel_for(
std::move(Q), Range, [Args...](sycl::nd_item<>) { Func(Args...); });
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bug is in this line. I had forgotten to pass the Dimensions template parameter to sycl::nd_item<> and so it always defaults to sycl::nd_item<1> which makes it impossible to work with 2 or 3 dimensional kernels.

std::move(Q), Range,
[Args...](sycl::nd_item<Dimensions>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename... ArgsT>
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
[[maybe_unused]] kernel_function_s<Func> KernelFunc,
ArgsT &&...Args) {
CGH.parallel_for(Range, [Args...](sycl::nd_item<>) { Func(Args...); });
CGH.parallel_for(Range,
[Args...](sycl::nd_item<Dimensions>) { Func(Args...); });
}

template <auto *Func, int Dimensions, typename Properties, typename... ArgsT>
Expand All @@ -436,7 +438,7 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
ConfigAccess(Config);
detail::submit_kernel_direct_parallel_for(
std::move(Q), ConfigAccess.getRange(),
[Args...](sycl::nd_item<>) { Func(Args...); }, {},
[Args...](sycl::nd_item<Dimensions>) { Func(Args...); }, {},
ConfigAccess.getProperties());
}

Expand All @@ -449,7 +451,7 @@ void nd_launch(handler &CGH,
Properties>
ConfigAccess(Config);
CGH.parallel_for(ConfigAccess.getRange(), ConfigAccess.getProperties(),
[Args...](sycl::nd_item<>) { Func(Args...); });
[Args...](sycl::nd_item<Dimensions>) { Func(Args...); });
}

inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ void square(int *src, int *dst) {
dst[Lid] = src[Lid] * src[Lid];
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<2>))
void square2D(int *src, int *dst) {
size_t Gid = syclext::this_work_item::get_nd_item<2>().get_global_linear_id();
dst[Gid] = src[Gid] * src[Gid];
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<3>))
void square3D(int *src, int *dst) {
size_t Gid = syclext::this_work_item::get_nd_item<3>().get_global_linear_id();
dst[Gid] = src[Gid] * src[Gid];
}

template <typename T>
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
void squareWithScratchMemoryTemplated(T *src, T *dst) {
Expand All @@ -60,7 +72,7 @@ void squareWithAccessor(accType src, accType dst) {
dst[Lid] = src[Lid] * src[Lid];
}

constexpr int SIZE = 16;
constexpr int SIZE = 8;

int main() {
sycl::queue Q;
Expand Down Expand Up @@ -175,6 +187,34 @@ int main() {
assert(Dst[I] == Src[I] * Src[I]);
}

int *Src2D = sycl::malloc_shared<int>(SIZE * SIZE, Q);
int *Dst2D = sycl::malloc_shared<int>(SIZE * SIZE, Q);

Q.submit([&](sycl::handler &CGH) {
syclexp::nd_launch(CGH,
::sycl::nd_range<2>(::sycl::range<2>(SIZE, SIZE),
::sycl::range<2>(SIZE, SIZE)),
syclexp::kernel_function<square2D>, Src2D, Dst2D);
}).wait();

for (int I = 0; I < SIZE * SIZE; I++) {
assert(Dst2D[I] == Src2D[I] * Src2D[I]);
}

int *Src3D = sycl::malloc_shared<int>(SIZE * SIZE * SIZE, Q);
int *Dst3D = sycl::malloc_shared<int>(SIZE * SIZE * SIZE, Q);

Q.submit([&](sycl::handler &CGH) {
syclexp::nd_launch(CGH,
::sycl::nd_range<3>(::sycl::range<3>(SIZE, SIZE, SIZE),
::sycl::range<3>(SIZE, SIZE, SIZE)),
syclexp::kernel_function<square3D>, Src3D, Dst3D);
}).wait();

for (int I = 0; I < SIZE * SIZE * SIZE; I++) {
assert(Dst3D[I] == Src3D[I] * Src3D[I]);
}

Q.submit([&](sycl::handler &CGH) {
static_assert(std::is_same_v<decltype(syclexp::single_task(
CGH, syclexp::kernel_function<successor>,
Expand Down
Loading