Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/extension_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ jobs:
-S ${{ github.workspace }}
-DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.install-dir }}
-DSIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH=OFF
-DSIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES=OFF

- name: Build SimSYCL (no extensions)
run: >
Expand Down Expand Up @@ -71,6 +72,7 @@ jobs:
-S ${{ github.workspace }}
-DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.install-dir }}
-DSIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH=ON
-DSIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES=ON

- name: Build SimSYCL (with extensions)
run: >
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ set(SIMSYCL_CHECK_MODE "ABORT" CACHE STRING "Runtime assertion handling NONE|LOG

# Extension options
option(SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH "Enable the SYCL_KHR_QUEUE_FLUSH extension" ON)
option(SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES "Enable the SYCL_KHR_WORK_ITEM_QUERIES extension" ON)

set(CONFIG_PATH "${CMAKE_CURRENT_BINARY_DIR}/include/simsycl/config.hh")
configure_file(
Expand Down
1 change: 1 addition & 0 deletions cmake/simsycl-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ set(SIMSYCL_CHECK_MODE "@SIMSYCL_CHECK_MODE@")
set(SIMSYCL_ENABLE_ASAN "@SIMSYCL_ENABLE_ASAN@")

set(SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH "@SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH@")
set(SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES "@SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES@")

include("${CMAKE_CURRENT_LIST_DIR}/AddToTarget.cmake")
1 change: 1 addition & 0 deletions include/simsycl/config.hh.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#cmakedefine01 SIMSYCL_FEATURE_HALF_TYPE

#cmakedefine01 SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH
#cmakedefine01 SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

#ifndef SIMSYCL_CHECK_MODE
#define SIMSYCL_CHECK_MODE SIMSYCL_CHECK_@SIMSYCL_CHECK_MODE@
Expand Down
2 changes: 1 addition & 1 deletion include/simsycl/detail/check.hh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct sink {

#if SIMSYCL_CHECK_MODE == SIMSYCL_CHECK_NONE
#define SIMSYCL_CHECK_MSG(CONDITION, ...) \
do { (void)(CONDITION); } while(0)
do { simsycl::detail::sink{CONDITION, __VA_ARGS__}; } while(0)
#elif SIMSYCL_CHECK_MODE == SIMSYCL_CHECK_LOG || SIMSYCL_CHECK_MODE == SIMSYCL_CHECK_THROW \
|| SIMSYCL_CHECK_MODE == SIMSYCL_CHECK_ABORT
#define SIMSYCL_CHECK_MSG(CONDITION, ...) \
Expand Down
2 changes: 2 additions & 0 deletions include/simsycl/sycl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,6 @@
#include "sycl/type_traits.hh"
#include "sycl/usm.hh"
#include "sycl/vec.hh"

#include "sycl/khr/work_item_queries.hh"
// IWYU pragma: end_keep
50 changes: 50 additions & 0 deletions include/simsycl/sycl/khr/work_item_queries.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include "simsycl/sycl/group.hh"
#include "simsycl/sycl/nd_item.hh"
#include "simsycl/sycl/sub_group.hh"

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
#define SYCL_KHR_WORK_ITEM_QUERIES 1
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

namespace simsycl::sycl::khr {

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

namespace detail {
template<int Dimensions>
thread_local std::optional<simsycl::sycl::nd_item<Dimensions>> g_khr_wi_query_this_nd_item;

template<int Dimensions>
thread_local std::optional<simsycl::sycl::group<Dimensions>> g_khr_wi_query_this_group;

inline thread_local std::optional<simsycl::sycl::sub_group> g_khr_wi_query_this_sub_group;

inline void khr_wi_query_check(bool val, [[maybe_unused]] const char *query_name) {
SIMSYCL_CHECK_MSG(val,
"Work item query state '%s' is not available.\n"
"Make sure that the query originated from a kernel launched with a sycl::nd_range argument",
query_name);
}

} // namespace detail

template<int Dimensions>
simsycl::sycl::nd_item<Dimensions> this_nd_item() {
detail::khr_wi_query_check(detail::g_khr_wi_query_this_nd_item<Dimensions>.has_value(), "this_nd_item");
return detail::g_khr_wi_query_this_nd_item<Dimensions>.value();
}

template<int Dimensions>
simsycl::sycl::group<Dimensions> this_group() {
detail::khr_wi_query_check(detail::g_khr_wi_query_this_group<Dimensions>.has_value(), "this_group");
return detail::g_khr_wi_query_this_group<Dimensions>.value();
}

inline simsycl::sycl::sub_group this_sub_group() {
detail::khr_wi_query_check(detail::g_khr_wi_query_this_sub_group.has_value(), "this_sub_group");
return detail::g_khr_wi_query_this_sub_group.value();
}

#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

} // namespace simsycl::sycl::khr
63 changes: 53 additions & 10 deletions src/simsycl/schedule.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#include <simsycl/detail/utils.hh>
#include <simsycl/schedule.hh>
#include <simsycl/sycl/device.hh>
#include <simsycl/sycl/exception.hh>
#include <simsycl/sycl/group_functions.hh>
#include <simsycl/sycl/handler.hh>
#include <simsycl/system.hh>

#include "simsycl/schedule.hh"
#include "simsycl/detail/utils.hh"
#include "simsycl/sycl/device.hh"
#include "simsycl/sycl/exception.hh"
#include "simsycl/sycl/group.hh"
#include "simsycl/sycl/group_functions.hh" // IWYU pragma: keep
#include "simsycl/sycl/handler.hh" // IWYU pragma: keep
#include "simsycl/sycl/khr/work_item_queries.hh"
#include "simsycl/sycl/nd_item.hh"
#include "simsycl/system.hh"

#include <numeric>
#include <random>
Expand Down Expand Up @@ -181,6 +185,23 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
std::vector<detail::concurrent_sub_group> concurrent_sub_groups(num_concurrent_sub_groups);
std::vector<detail::concurrent_nd_item> num_concurrent_nd_items(num_concurrent_items);

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
std::vector<const sycl::nd_item<Dimensions> *> concurrent_khr_wi_query_nd_item_ptrs(num_concurrent_items, nullptr);

auto update_global_khr_wi_query_data = [&](int cc_g_idx = -1) {
if(cc_g_idx != -1 && concurrent_khr_wi_query_nd_item_ptrs[cc_g_idx] != nullptr) {
const auto nd_item = *concurrent_khr_wi_query_nd_item_ptrs[cc_g_idx];
sycl::khr::detail::g_khr_wi_query_this_nd_item<Dimensions> = nd_item;
sycl::khr::detail::g_khr_wi_query_this_group<Dimensions> = nd_item.get_group();
sycl::khr::detail::g_khr_wi_query_this_sub_group = nd_item.get_sub_group();
} else {
sycl::khr::detail::g_khr_wi_query_this_nd_item<Dimensions> = std::nullopt;
sycl::khr::detail::g_khr_wi_query_this_group<Dimensions> = std::nullopt;
sycl::khr::detail::g_khr_wi_query_this_sub_group = std::nullopt;
}
};
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

for(auto &cgroup : concurrent_groups) {
cgroup.local_memory_allocations.resize(local_memory.size());
for(size_t i = 0; i < local_memory.size(); ++i) {
Expand Down Expand Up @@ -220,8 +241,13 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
group_linear_range, sub_group_linear_id_in_group, sub_group_linear_range_in_group,
sub_group_max_local_linear_range, sub_group_max_local_range, thread_id_in_sub_group,
sub_group_id_in_group, sub_group_range_in_group, &concurrent_nd_item, &concurrent_group,
&concurrent_sub_group, &kernel, &concurrent_items_exited, &caught_exceptions,
&range](boost::context::continuation &&scheduler) //
&concurrent_sub_group, &kernel, &concurrent_items_exited, &caught_exceptions, &range
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
,
concurrent_global_idx, &concurrent_khr_wi_query_nd_item_ptrs,
&update_global_khr_wi_query_data
#endif
](boost::context::continuation &&scheduler) //
{
// yield immediately to allow the scheduling loop to set up local memory pointers
enter_kernel_fiber(std::move(scheduler));
Expand All @@ -245,7 +271,8 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D

SIMSYCL_START_IGNORING_DEPRECATIONS;
const auto group_id = linear_index_to_id(group_range, group_linear_id);
const auto global_id = range.get_offset() + (group_id * sycl::id<Dimensions>(local_range)) + local_id;
const auto global_id
= range.get_offset() + (group_id * sycl::id<Dimensions>(local_range)) + local_id;
Comment on lines +274 to +275
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a clang-format change?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's how it is formatted for me right now. (I wouldn't write that on purpose ;))


// if sub-group range is not divisible by local range, the last sub-group will be smaller
const auto sub_group_local_linear_range = std::min(sub_group_max_local_linear_range,
Expand All @@ -265,6 +292,12 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
const auto nd_item
= detail::make_nd_item(global_item, local_item, group, sub_group, &concurrent_nd_item);

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
concurrent_khr_wi_query_nd_item_ptrs[concurrent_global_idx] = &nd_item;
// adjust the globals now that the data is available, before starting the kernel
update_global_khr_wi_query_data(concurrent_global_idx);
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

try {
kernel(nd_item);
// Add an implicit "exit" operations to groups and sub-groups to catch potential divergence on
Expand Down Expand Up @@ -311,11 +344,21 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
*local_memory[i].ptr = concurrent_groups[concurrent_group_idx].local_memory_allocations[i].get();
}

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
// adjust globals before switching fibers
update_global_khr_wi_query_data(concurrent_global_idx);
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

fibers[concurrent_global_idx] = fibers[concurrent_global_idx].resume();
}
schedule_state = schedule.update(schedule_state, order);
}

#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
// reset globals
update_global_khr_wi_query_data();
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

// rethrow any encountered exceptions
for(auto &exception : caught_exceptions) { std::rethrow_exception(exception); }
}
Expand Down
4 changes: 4 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ add_executable(tests
simulation_tests.cc
alloc_tests.cc
vec_tests.cc
extensions/work_item_queries_test.cc
)

# use throw check mode in the main test executable so we can have unit tests for error reporting
target_compile_definitions(tests PRIVATE SIMSYCL_CHECK_MODE=SIMSYCL_CHECK_THROW)

add_sycl_to_target(TARGET tests SIMSYCL_ALL_WARNINGS)
target_link_libraries(tests PRIVATE Catch2::Catch2WithMain)

Expand Down
9 changes: 9 additions & 0 deletions test/extensions/extensions_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,13 @@ int main() {

// SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH
queue.khr_flush();

// SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
queue.submit([&](sycl::handler &cgh) {
cgh.parallel_for(sycl::nd_range<1>(1024, 64), [=](sycl::nd_item<1>) {
[[maybe_unused]] const auto item = sycl::khr::this_nd_item<1>();
[[maybe_unused]] const auto group = sycl::khr::this_group<1>();
[[maybe_unused]] const auto sub_group = sycl::khr::this_sub_group();
});
});
}
85 changes: 85 additions & 0 deletions test/extensions/work_item_queries_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#include <simsycl/sycl.hh>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/matchers/catch_matchers_string.hpp>

using Catch::Matchers::ContainsSubstring;

using namespace simsycl;

TEST_CASE("work item queries set feature test macro", "[khr][work_item_queries]") {
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
CHECK(SYCL_KHR_WORK_ITEM_QUERIES == 1);
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
}

TEMPLATE_TEST_CASE_SIG(
"work item queries are correct if supported", "[khr][work_item_queries]", ((int Dims), Dims), 1, 2, 3) {
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

sycl::range<Dims> global_range;
sycl::range<Dims> local_range;
for(int d = 0; d < Dims; ++d) {
const int s = d + 1;
global_range[d] = s * (2 + s);
local_range[d] = 2 + s;
}

std::vector<bool> visited(global_range.size(), false);
sycl::queue()
.submit([&](sycl::handler &cgh) {
cgh.parallel_for(sycl::nd_range(global_range, local_range), [=, &visited](sycl::nd_item<Dims> it) {
const auto global_linear_id = it.get_global_linear_id();
CHECK(global_linear_id < global_range.size());
CHECK(!visited[global_linear_id]);
visited[global_linear_id] = true;

CHECK(sycl::khr::this_nd_item<Dims>() == it);
CHECK(sycl::khr::this_group<Dims>() == it.get_group());
CHECK(sycl::khr::this_sub_group() == it.get_sub_group());

group_barrier(it.get_group());

// check again after scheduling through group_barrier
CHECK(sycl::khr::this_nd_item<Dims>() == it);
CHECK(sycl::khr::this_group<Dims>() == it.get_group());
CHECK(sycl::khr::this_sub_group() == it.get_sub_group());
});
})
.wait();

for(size_t i = 0; i < global_range.size(); ++i) {
CAPTURE(i);
CHECK(visited[i]);
}

#else // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
SKIP("SYCL_KHR_WORK_ITEM_QUERIES not enabled");
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
}

TEST_CASE("work item queries provide useful errors", "[khr][work_item_queries]") {
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

// outside of everything
REQUIRE_THROWS_WITH(sycl::khr::this_nd_item<1>(), ContainsSubstring("state 'this_nd_item' is not available"));
REQUIRE_THROWS_WITH(sycl::khr::this_group<1>(), ContainsSubstring("state 'this_group' is not available"));
REQUIRE_THROWS_WITH(sycl::khr::this_sub_group(), ContainsSubstring("state 'this_sub_group' is not available"));

// in a non-nd parallel for
sycl::queue{}.submit([&](sycl::handler &cgh) {
cgh.parallel_for(sycl::range{1}, [=](sycl::item<1>) {
const char *test_str
= "Make sure that the query originated from a kernel launched with a sycl::nd_range argument";
CHECK_THROWS_WITH(sycl::khr::this_nd_item<1>(), ContainsSubstring(test_str));
CHECK_THROWS_WITH(sycl::khr::this_group<1>(), ContainsSubstring(test_str));
CHECK_THROWS_WITH(sycl::khr::this_sub_group(), ContainsSubstring(test_str));
});
});

#else // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
SKIP("SYCL_KHR_WORK_ITEM_QUERIES not enabled");
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
}
Loading