Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ public:

// device_global initializes underlying T with the args argument
#if __cpp_consteval
// Available only if sizeof...(Args) > 1 or the one argument in args is not a
// device_global.
template <typename... Args>
consteval explicit device_global(Args&&... args);
#else
Expand All @@ -244,7 +246,14 @@ public:
device_global() = default;
#endif // __cpp_consteval

device_global(const device_global &) = delete;
// Available if PropertyListT::has_property<device_image_scope_key>() is false.
constexpr device_global(const device_global &other);

// Available if PropertyListT::has_property<device_image_scope_key>() is false
// and OtherT is convertible to T.
template <typename OtherT, typename OtherProps>
constexpr device_global(const device_global<OtherT, OtherProps> &other);

device_global(const device_global &&) = delete;
device_global &operator=(const device_global &) = delete;
device_global &operator=(const device_global &&) = delete;
Expand Down Expand Up @@ -318,12 +327,42 @@ template <typename... Args>
consteval explicit device_global(Args&&... args);
----
|
Available only if sizeof...(Args) != 1 or the one argument in args is not a device_global.

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The object of type `T` is initialized from the `args` parameter pack using list initialization as defined in the {cpp} specification.

`T` must be trivially destructible.

// --- ROW BREAK ---
a|
[source,c++]
----
constexpr device_global(const device_global &other);
----
|
Available if `PropertyListT::has_property<device_image_scope_key>() == false`.

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The storage on each device for `T` is initialized with a copy of the initial value of `other`. The behavior is undefined if `other` has been written to prior to a call to this constructor.

// --- ROW BREAK ---
a|
[source,c++]
----
template <typename OtherT, typename OtherProps>
constexpr device_global(const device_global<OtherT, OtherProps> &other);
----
|
Available if `PropertyListT::has_property<device_image_scope_key>() == false` and
`std::is_convertible_v<OtherT, T> == true`;

Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it.

The storage on each device for `T` is initialized with a copy of the initial value of `other`. The behavior is undefined if `other` has been written to prior to a call to this constructor.

// --- ROW BREAK ---
a|
[source,c++]
Expand Down
78 changes: 73 additions & 5 deletions sycl/include/sycl/ext/oneapi/device_global/device_global.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

template <typename T, typename PropertyListT> class device_global;

namespace detail {
// Type-trait for checking if a type defines `operator->`.
template <typename T, typename = void>
Expand All @@ -49,6 +51,20 @@ struct HasArrowOperator<T,
std::void_t<decltype(std::declval<T>().operator->())>>
: std::true_type {};

template <typename T, typename PropertyListT, typename>
class device_global_base;

// Checks that T is a reference to either device_global or
// device_global_base. This is used by the variadic ctor to allow copy ctors to
// take preference.
template <typename T> struct IsDeviceGlobalOrBaseRef : std::false_type {};
template <typename T, typename PropertyListT>
struct IsDeviceGlobalOrBaseRef<device_global_base<T, PropertyListT, void> &>
: std::true_type {};
template <typename T, typename PropertyListT>
struct IsDeviceGlobalOrBaseRef<device_global<T, PropertyListT> &>
: std::true_type {};

// Base class for device_global.
template <typename T, typename PropertyListT, typename = void>
class device_global_base {
Expand All @@ -63,14 +79,49 @@ class device_global_base {
pointer_t get_ptr() noexcept { return usmptr; }
pointer_t get_ptr() const noexcept { return usmptr; }

template <typename, typename, typename> friend class device_global_base;

#ifndef __SYCL_DEVICE_ONLY__
template <typename OtherT, typename OtherProps>
static constexpr const T &
ExtractInitialVal(const device_global_base<OtherT, OtherProps> &Other) {
if constexpr (OtherProps::template has_property<device_image_scope_key>())
return Other.val;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious ... is this code used? It seems like the constructor that uses this function is constrained such that the property list never has device_image_scope.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The object being constructed cannot have the device_image_scope property, but other can. Otherwise, we could have just lined the Other.init_val in the ctor.

else
return Other.init_val;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was surprised to see that we have an init_val field. I realize this existed before your PR, but this seems undesirable, and I wonder if we should rely on it for this feature. Imagine a case where the device_global wraps a large array. In such a case, we will allocate memory for that array twice: once as USM and another time for init_val.

Do you understand why init_val was added, and what other features rely on it?

I'm somewhat torn here because I'm planning another feature which would also benefit from init_val. However, it seems really bad to burn the extra memory, especially when T is large.

Thinking out loud ... would it make sense to declare init_val only when T is not an array?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's a valid concern. I don't remember if there was a reason not to store it elsewhere. I will investigate if the device global entry list could be used to store the init values instead. I think it might be possible, but I think it would be better as a follow-up patch.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, addressing it in a follow-up PR is OK.

}
#endif // __SYCL_DEVICE_ONLY__

public:
#if __cpp_consteval
template <typename... Args>
// The SFINAE is to allow the copy constructors to take priority.
template <
typename... Args,
std::enable_if_t<
sizeof...(Args) != 1 ||
(!IsDeviceGlobalOrBaseRef<std::remove_cv_t<Args>>::value && ...),
int> = 0>
consteval explicit device_global_base(Args &&...args) : init_val{args...} {}
#else
device_global_base() = default;
#endif // __cpp_consteval

#ifndef __SYCL_DEVICE_ONLY__
template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(
const device_global_base<OtherT, OtherProps> &DGB)
: init_val{ExtractInitialVal(DGB)} {}
constexpr device_global_base(const device_global_base &DGB)
: init_val{DGB.init_val} {}
#else
template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(const device_global_base<OtherT, OtherProps> &) {
}
constexpr device_global_base(const device_global_base &) {}
#endif // __SYCL_DEVICE_ONLY__

template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
Expand Down Expand Up @@ -100,14 +151,28 @@ class device_global_base<
T *get_ptr() noexcept { return &val; }
const T *get_ptr() const noexcept { return &val; }

template <typename, typename, typename> friend class device_global_base;

public:
#if __cpp_consteval
template <typename... Args>
// The SFINAE is to allow the copy constructors to take priority.
template <
typename... Args,
std::enable_if_t<
sizeof...(Args) != 1 ||
(!IsDeviceGlobalOrBaseRef<std::remove_cv_t<Args>>::value && ...),
int> = 0>
consteval explicit device_global_base(Args &&...args) : val{args...} {}
#else
device_global_base() = default;
#endif // __cpp_consteval

template <typename OtherT, typename OtherProps,
typename = std::enable_if_t<std::is_convertible_v<OtherT, T>>>
constexpr device_global_base(const device_global_base<OtherT, OtherProps> &) =
delete;
constexpr device_global_base(const device_global_base &) = delete;

template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
Expand All @@ -124,6 +189,7 @@ class device_global_base<
const T>(this->get_ptr());
}
};

} // namespace detail

template <typename T, typename PropertyListT = empty_properties_t>
Expand Down Expand Up @@ -151,6 +217,7 @@ class
: public detail::device_global_base<T, detail::properties_t<Props...>> {

using property_list_t = detail::properties_t<Props...>;
using base_t = detail::device_global_base<T, property_list_t>;

public:
using element_type = std::remove_extent_t<T>;
Expand All @@ -167,10 +234,11 @@ class
"Property list is invalid.");

// Inherit the base class' constructors
using detail::device_global_base<
T, detail::properties_t<Props...>>::device_global_base;
using detail::device_global_base<T, property_list_t>::device_global_base;

constexpr device_global(const device_global &DG)
: base_t(static_cast<const base_t &>(DG)) {}

device_global(const device_global &) = delete;
device_global(const device_global &&) = delete;
device_global &operator=(const device_global &) = delete;
device_global &operator=(const device_global &&) = delete;
Expand Down
68 changes: 68 additions & 0 deletions sycl/test-e2e/DeviceGlobal/device_global_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// RUN: %{build} -std=c++23 -o %t.out
// RUN: %{run} %t.out
//
// UNSUPPORTED: opencl && gpu
// UNSUPPORTED-TRACKER: GSD-4287
//
// Tests the copy ctor on device_global without device_image_scope.

#include <sycl/detail/core.hpp>

namespace oneapiext = sycl::ext::oneapi::experimental;

oneapiext::device_global<const int> DGInit1{3};
oneapiext::device_global<const int> DGCopy1{DGInit1};

oneapiext::device_global<int> DGInit2{4};
oneapiext::device_global<int> DGCopy2{DGInit2};

oneapiext::device_global<float> DGInit3{5.0f};
oneapiext::device_global<int> DGCopy3{DGInit3};

oneapiext::device_global<const int, decltype(oneapiext::properties{
oneapiext::device_image_scope})>
DGInit4{6};
oneapiext::device_global<const int> DGCopy4{DGInit4};

oneapiext::device_global<const int> DGInit5{7};
oneapiext::device_global<const int, decltype(oneapiext::properties{
oneapiext::host_access_read})>
DGCopy5{DGInit5};

int main() {
sycl::queue Q;

int ReadVals[10] = {0, 0};
{
sycl::buffer<int, 1> ReadValsBuff{ReadVals, 10};

Q.submit([&](sycl::handler &CGH) {
sycl::accessor ReadValsAcc{ReadValsBuff, CGH, sycl::write_only};
CGH.single_task([=]() {
ReadValsAcc[0] = DGInit1.get();
ReadValsAcc[1] = DGCopy1.get();
ReadValsAcc[2] = DGInit2.get();
ReadValsAcc[3] = DGCopy2.get();
ReadValsAcc[4] = DGInit3.get();
ReadValsAcc[5] = DGCopy3.get();
ReadValsAcc[6] = DGInit4.get();
ReadValsAcc[7] = DGCopy4.get();
ReadValsAcc[8] = DGInit5.get();
ReadValsAcc[9] = DGCopy5.get();
});
}).wait_and_throw();
}

assert(ReadVals[0] == 3);
assert(ReadVals[1] == 3);
assert(ReadVals[2] == 4);
assert(ReadVals[3] == 4);
assert(ReadVals[4] == 5);
assert(ReadVals[5] == 5);
assert(ReadVals[6] == 6);
assert(ReadVals[7] == 6);
assert(ReadVals[8] == 7);
assert(ReadVals[9] == 7);

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: %clangxx -std=c++23 -fsycl -fsycl-targets=%sycl_triple -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s
//
// Tests that the copy ctor on device_global with device_image_scope is
// unavailable.

#include <sycl/sycl.hpp>

namespace oneapiext = sycl::ext::oneapi::experimental;

using device_image_properties =
decltype(oneapiext::properties{oneapiext::device_image_scope});

// expected-error@sycl/ext/oneapi/device_global/device_global.hpp:* {{call to deleted constructor}}
oneapiext::device_global<const int, device_image_properties> DGInit1{3};
oneapiext::device_global<const int, device_image_properties> DGCopy1{DGInit1};

// expected-error@sycl/ext/oneapi/device_global/device_global.hpp:* {{call to deleted constructor}}
oneapiext::device_global<int, device_image_properties> DGInit2{3};
oneapiext::device_global<int, device_image_properties> DGCopy2{DGInit2};

// expected-error@+2 {{call to deleted constructor}}
oneapiext::device_global<int, device_image_properties> DGInit3{3};
oneapiext::device_global<float, device_image_properties> DGCopy3{DGInit3};

// expected-error@+2 {{call to deleted constructor}}
oneapiext::device_global<const int> DGInit4{3};
oneapiext::device_global<const int, device_image_properties> DGCopy4{DGInit4};
Comment on lines +25 to +27
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the other way around, from one with device_image_properties to the one without? Even if it should succeed, I think it would be useful to include that in this test to highlight the difference/specifics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That case is covered in sycl/test-e2e/DeviceGlobal/device_global_copy.cpp. I personally don't think it should be in a negative test.


int main() { return 0; }
Loading