Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,18 @@ The load and store sub-group functions enable developers to assert that all work
|===
|Function|Description

|+template <typename T> T load(sub_group sg, const T *src)+
|Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion.

|+template <typename T, access::address_space Space> T load(sub_group sg, const multi_ptr<T,Space> src)+
|Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.

|+template <int N, typename T, access::address_space Space> vec<T,N> load(sub_group sg, const multi_ptr<T,Space> src)+
|Load contiguous data from _src_. Returns _N_ elements per work-item, corresponding to the _N_ memory locations at _src_ + +i+ * +get_max_local_range()+ + +get_local_id()+ for +i+ between 0 and _N_. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.

|+template <typename T> void store(sub_group sg, T *dst, const T& x)+
|Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion.

|+template <typename T, access::address_space Space> void store(sub_group sg, multi_ptr<T,Space> dst, const T& x)+
|Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.

Expand All @@ -165,6 +171,7 @@ None.
|========================================
|Rev|Date|Author|Changes
|1|2020-03-16|John Pennycook|*Initial public working draft*
|2|2021-02-26|Vladimir Lazarev|*Add load/store method for raw pointers*
|========================================

//************************************************************************
Expand Down
24 changes: 24 additions & 0 deletions sycl/include/CL/__spirv/spirv_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,30 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)

extern SYCL_EXTERNAL __attribute__((opencl_global)) void *
__spirv_GenericCastToPtrExplicit_ToGlobal(const void *Ptr,
__spv::StorageClass::Flag S) noexcept;

extern SYCL_EXTERNAL __attribute__((opencl_local)) void *
__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr,
__spv::StorageClass::Flag S) noexcept;

template <typename dataT>
extern __attribute__((opencl_global)) dataT *
__spirv_GenericCastToPtrExplicit_ToGlobal(
const void *Ptr, __spv::StorageClass::Flag S) noexcept {
return (__attribute__((opencl_global))
dataT *)__spirv_GenericCastToPtrExplicit_ToGlobal(Ptr, S);
}

template <typename dataT>
extern __attribute__((opencl_local)) dataT *
__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr,
__spv::StorageClass::Flag S) noexcept {
return (__attribute__((opencl_local))
dataT *)__spirv_GenericCastToPtrExplicit_ToLocal(Ptr, S);
}

template <typename dataT>
__SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT
__spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
Expand Down
40 changes: 40 additions & 0 deletions sycl/include/CL/__spirv/spirv_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,46 @@ struct Scope {
Flag flag_value;
};

struct StorageClass {
enum Flag : uint32_t {
UniformConstant = 0,
Input = 1,
Uniform = 2,
Output = 3,
Workgroup = 4,
CrossWorkgroup = 5,
Private = 6,
Function = 7,
Generic = 8,
PushConstant = 9,
AtomicCounter = 10,
Image = 11,
StorageBuffer = 12,
CallableDataKHR = 5328,
CallableDataNV = 5328,
IncomingCallableDataKHR = 5329,
IncomingCallableDataNV = 5329,
RayPayloadKHR = 5338,
RayPayloadNV = 5338,
HitAttributeKHR = 5339,
HitAttributeNV = 5339,
IncomingRayPayloadKHR = 5342,
IncomingRayPayloadNV = 5342,
ShaderRecordBufferKHR = 5343,
ShaderRecordBufferNV = 5343,
PhysicalStorageBuffer = 5349,
PhysicalStorageBufferEXT = 5349,
CodeSectionINTEL = 5605,
CapabilityUSMStorageClassesINTEL = 5935,
DeviceOnlyINTEL = 5936,
HostOnlyINTEL = 5937,
Max = 0x7fffffff,
};
constexpr StorageClass(Flag flag) : flag_value(flag) {}
constexpr operator uint32_t() const { return flag_value; }
Flag flag_value;
};

struct MemorySemanticsMask {

enum Flag : uint32_t {
Expand Down
90 changes: 90 additions & 0 deletions sycl/include/CL/sycl/ONEAPI/sub_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,47 @@ struct sub_group {

/* --- sub_group load/stores --- */
/* these can map to SIMD or block read/write hardware where available */
#ifdef __SYCL_DEVICE_ONLY__
// Method for decorated pointer
template <typename T>
detail::enable_if_t<
!std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
load(T *src) const {
return load(sycl::multi_ptr<typename detail::remove_AS<T>::type,
sycl::detail::deduce_AS<T>::value>(
(typename detail::remove_AS<T>::type *)src));
}

// Method for raw pointer
template <typename T>
detail::enable_if_t<
std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
load(T *src) const {

#ifdef __NVPTX__
return src[get_local_id()[0]];
#else // __NVPTX__
auto l = __spirv_GenericCastToPtrExplicit_ToLocal<T>(
src, __spv::StorageClass::Workgroup);
if (l)
return load(l);

auto g = __spirv_GenericCastToPtrExplicit_ToGlobal<T>(
src, __spv::StorageClass::CrossWorkgroup);
if (g)
return load(g);

assert(!"Sub-group load() is supported for local or global pointers only.");
return {};
#endif // __NVPTX__
}
#else //__SYCL_DEVICE_ONLY__
template <typename T> T load(T *src) const {
(void)src;
throw runtime_error("Sub-groups are not supported on host device.",
PI_INVALID_DEVICE);
}
#endif //__SYCL_DEVICE_ONLY__

template <typename T, access::address_space Space>
sycl::detail::enable_if_t<
Expand Down Expand Up @@ -315,6 +356,55 @@ struct sub_group {
#endif
}

#ifdef __SYCL_DEVICE_ONLY__
// Method for decorated pointer
template <typename T>
detail::enable_if_t<
!std::is_same<typename detail::remove_AS<T>::type, T>::value>
store(T *dst, const typename detail::remove_AS<T>::type &x) const {
store(sycl::multi_ptr<typename detail::remove_AS<T>::type,
sycl::detail::deduce_AS<T>::value>(
(typename detail::remove_AS<T>::type *)dst),
x);
}

// Method for raw pointer
template <typename T>
detail::enable_if_t<
std::is_same<typename detail::remove_AS<T>::type, T>::value>
store(T *dst, const typename detail::remove_AS<T>::type &x) const {

#ifdef __NVPTX__
dst[get_local_id()[0]] = x;
#else // __NVPTX__
auto l = __spirv_GenericCastToPtrExplicit_ToLocal<T>(
dst, __spv::StorageClass::Workgroup);
if (l) {
store(l, x);
return;
}

auto g = __spirv_GenericCastToPtrExplicit_ToGlobal<T>(
dst, __spv::StorageClass::CrossWorkgroup);
if (g) {
store(g, x);
return;
}

assert(
!"Sub-group store() is supported for local or global pointers only.");
return;
#endif // __NVPTX__
}
#else //__SYCL_DEVICE_ONLY__
template <typename T> void store(T *dst, const T &x) const {
(void)dst;
(void)x;
throw runtime_error("Sub-groups are not supported on host device.",
PI_INVALID_DEVICE);
}
#endif //__SYCL_DEVICE_ONLY__

template <typename T, access::address_space Space>
sycl::detail::enable_if_t<
sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
Expand Down
57 changes: 39 additions & 18 deletions sycl/include/CL/sycl/access/access.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,16 @@ template <typename ElementType>
struct DecoratedType<ElementType, access::address_space::local_space> {
using type = __OPENCL_LOCAL_AS__ ElementType;
};

template <class T>
struct remove_AS {
typedef T type;
};
template <class T> struct remove_AS { typedef T type; };

#ifdef __SYCL_DEVICE_ONLY__
template <class T>
struct remove_AS<__OPENCL_GLOBAL_AS__ T> {
typedef T type;
template <class T> struct deduce_AS {
static_assert(!std::is_same<typename detail::remove_AS<T>::type, T>::value,
"Only types with address space attributes are supported");
};

template <class T> struct remove_AS<__OPENCL_GLOBAL_AS__ T> { typedef T type; };

#ifdef __ENABLE_USM_ADDR_SPACE__
template <class T> struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
typedef T type;
Expand All @@ -207,21 +205,45 @@ template <class T> struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
template <class T> struct remove_AS<__OPENCL_GLOBAL_HOST_AS__ T> {
typedef T type;
};

template <class T> struct deduce_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
static const access::address_space value =
access::address_space::global_device_space;
};

template <class T> struct deduce_AS<__OPENCL_GLOBAL_HOST_AS__ T> {
static const access::address_space value =
access::address_space::global_host_space;
};
#endif // __ENABLE_USM_ADDR_SPACE__

template <class T>
struct remove_AS<__OPENCL_PRIVATE_AS__ T> {
template <class T> struct remove_AS<__OPENCL_PRIVATE_AS__ T> {
typedef T type;
};

template <class T>
struct remove_AS<__OPENCL_LOCAL_AS__ T> {
template <class T> struct remove_AS<__OPENCL_LOCAL_AS__ T> { typedef T type; };

template <class T> struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
typedef T type;
};

template <class T>
struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
typedef T type;
template <class T> struct deduce_AS<__OPENCL_GLOBAL_AS__ T> {
static const access::address_space value =
access::address_space::global_space;
};

template <class T> struct deduce_AS<__OPENCL_PRIVATE_AS__ T> {
static const access::address_space value =
access::address_space::private_space;
};

template <class T> struct deduce_AS<__OPENCL_LOCAL_AS__ T> {
static const access::address_space value = access::address_space::local_space;
};

template <class T> struct deduce_AS<__OPENCL_CONSTANT_AS__ T> {
static const access::address_space value =
access::address_space::constant_space;
};
#endif

Expand All @@ -231,8 +253,7 @@ struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
#undef __OPENCL_LOCAL_AS__
#undef __OPENCL_CONSTANT_AS__
#undef __OPENCL_PRIVATE_AS__

} // namespace detail

} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
2 changes: 1 addition & 1 deletion sycl/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ set_target_properties(check-sycl-deploy PROPERTIES FOLDER "SYCL tests")
add_lit_testsuite(check-sycl-spirv "Running device-agnostic SYCL regression tests for SPIR-V"
${CMAKE_CURRENT_BINARY_DIR}
ARGS ${RT_TEST_ARGS}
PARAMS "SYCL_TRIPLE=spir64-unknown-linux-sycldevice"
PARAMS "SYCL_TRIPLE=spir64-unknown-unknown-sycldevice"
DEPENDS ${SYCL_TEST_DEPS}
EXCLUDE_FROM_CHECK_ALL
)
Expand Down
38 changes: 38 additions & 0 deletions sycl/test/basic_tests/address_space_traits.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clangxx -fsycl -fsycl-device-only -D__ENABLE_USM_ADDR_SPACE__ -fsycl-targets=%sycl_triple %s -c

#include <CL/sycl.hpp>
#include <cassert>

using namespace cl::sycl;
int main() {

queue myQueue;
myQueue.submit([&](handler &cgh) {
cgh.single_task<class dummy>([=]() {
static_assert(
detail::deduce_AS<__attribute__((opencl_global)) int>::value ==
access::address_space::global_space,
"Unexpected address space");
static_assert(
detail::deduce_AS<__attribute__((opencl_local)) int>::value ==
access::address_space::local_space,
"Unexpected address space");
static_assert(
detail::deduce_AS<__attribute__((opencl_private)) int>::value ==
access::address_space::private_space,
"Unexpected address space");
static_assert(
detail::deduce_AS<__attribute__((opencl_constant)) int>::value ==
access::address_space::constant_space,
"Unexpected address space");
static_assert(
detail::deduce_AS<__attribute__((opencl_global_device)) int>::value ==
access::address_space::global_device_space,
"Unexpected address space");
static_assert(
detail::deduce_AS<__attribute__((opencl_global_host)) int>::value ==
access::address_space::global_host_space,
"Unexpected address space");
});
});
}
Loading