Skip to content

Commit

Permalink
[SYCL][PI][CUDA] Update queries for atomic order and scope for CUDA (#…
Browse files Browse the repository at this point in the history
…4853)

Updates returns for atomics memory order and scope capabilities queries to make them in line with changes in #4820. 

This includes adding the previously not existing option to query for atomic scope capabilities.
  • Loading branch information
t4c1 committed Jan 26, 2022
1 parent ec29322 commit 43a4192
Show file tree
Hide file tree
Showing 13 changed files with 136 additions and 7 deletions.
12 changes: 10 additions & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,12 @@ typedef enum {
PI_DEVICE_INFO_IMAGE_SRGB = 0x10027,
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002,
PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003

} _pi_device_info;

typedef enum {
Expand All @@ -330,7 +330,8 @@ typedef enum {
PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES,
PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT,
// Atomics capabilities extensions
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010,
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x10011
} _pi_context_info;

typedef enum {
Expand Down Expand Up @@ -537,6 +538,13 @@ constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04;
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08;
constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10;

using pi_memory_scope_capabilities = pi_bitfield;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08;
constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10;

typedef enum {
PI_PROFILING_INFO_COMMAND_QUEUED = CL_PROFILING_COMMAND_QUEUED,
PI_PROFILING_INFO_COMMAND_SUBMIT = CL_PROFILING_COMMAND_SUBMIT,
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/info/context_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ __SYCL_PARAM_TRAITS_SPEC(context, reference_count, cl_uint)
__SYCL_PARAM_TRAITS_SPEC(context, platform, cl::sycl::platform)
__SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector<cl::sycl::device>)
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector<cl::sycl::memory_order>)
__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_scope_capabilities, std::vector<cl::sycl::memory_scope>)
2 changes: 2 additions & 0 deletions sycl/include/CL/sycl/info/device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, image_support, bool)
__SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool)
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities,
std::vector<cl::sycl::memory_order>)
__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities,
std::vector<cl::sycl::memory_scope>)
__SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32)
__SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t)
Expand Down
7 changes: 6 additions & 1 deletion sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class program;
class device;
class platform;
class kernel_id;
enum class memory_scope;

// TODO: stop using OpenCL directly, use PI.
namespace info {
Expand All @@ -44,6 +45,8 @@ enum class context : cl_context_info {
devices = CL_CONTEXT_DEVICES,
atomic_memory_order_capabilities =
PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES,
atomic_memory_scope_capabilities =
PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES,
};

// A.3 Device information descriptors
Expand Down Expand Up @@ -168,7 +171,9 @@ enum class device : cl_device_info {
PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS,
ext_oneapi_max_work_groups_1d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D,
ext_oneapi_max_work_groups_2d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D,
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D
ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D,
atomic_memory_scope_capabilities =
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES
};

enum class device_type : pi_uint64 {
Expand Down
16 changes: 16 additions & 0 deletions sycl/include/CL/sycl/memory_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ readMemoryOrderBitfield(pi_memory_order_capabilities bits) {
return result;
}

inline std::vector<memory_scope>
readMemoryScopeBitfield(pi_memory_scope_capabilities bits) {
std::vector<memory_scope> result;
if (bits & PI_MEMORY_SCOPE_WORK_ITEM)
result.push_back(memory_scope::work_item);
if (bits & PI_MEMORY_SCOPE_SUB_GROUP)
result.push_back(memory_scope::sub_group);
if (bits & PI_MEMORY_SCOPE_WORK_GROUP)
result.push_back(memory_scope::work_group);
if (bits & PI_MEMORY_SCOPE_DEVICE)
result.push_back(memory_scope::device);
if (bits & PI_MEMORY_SCOPE_SYSTEM)
result.push_back(memory_scope::system);
return result;
}

#ifndef __SYCL_DEVICE_ONLY__
static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) {
switch (order) {
Expand Down
55 changes: 51 additions & 4 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,33 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
context->get_reference_count());
case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
context->get_device()->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
: PI_MEMORY_ORDER_RELAXED;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
context->get_device()->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
PI_MEMORY_SCOPE_SYSTEM
: PI_MEMORY_SCOPE_DEVICE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down Expand Up @@ -1112,11 +1139,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
atomic64);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
// NVPTX currently only support at most monotonic atomic load/store.
// Acquire and release is present in newer PTX, but is not yet supported
// in LLVM NVPTX.
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 6) ? PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE |
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL
: PI_MEMORY_ORDER_RELAXED;
return getInfo(param_value_size, param_value, param_value_size_ret,
capabilities);
}
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
int major = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device->get()) == CUDA_SUCCESS);
pi_memory_order_capabilities capabilities =
(major >= 5) ? PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP |
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE |
PI_MEMORY_SCOPE_SYSTEM
: PI_MEMORY_SCOPE_DEVICE;
return getInfo(param_value_size, param_value, param_value_size_ret,
PI_MEMORY_ORDER_RELAXED);
capabilities);
}
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
// NVIDIA devices only support one sub-group size (the warp size)
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ pi_result hip_piContextGetInfo(pi_context context, pi_context_info param_name,
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
context->get_reference_count());
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down Expand Up @@ -1625,6 +1626,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
case PI_DEVICE_INFO_ATOMIC_64:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
// TODO: Investigate if this information is available on HIP.
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
case PI_DEVICE_INFO_PCI_ADDRESS:
case PI_DEVICE_INFO_GPU_EU_COUNT:
case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
Expand Down
3 changes: 3 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2648,6 +2648,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
// currently not supported in level zero runtime
return PI_INVALID_VALUE;

// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
zePrint("Unsupported ParamName in piGetDeviceInfo\n");
zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName);
Expand Down Expand Up @@ -2842,6 +2844,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName,
return ReturnValue(pi_uint32(Context->Devices.size()));
case PI_CONTEXT_INFO_REFERENCE_COUNT:
return ReturnValue(pi_uint32{Context->RefCount});
case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
default:
// TODO: implement other parameters
die("piGetContextInfo: unsuppported ParamName.");
Expand Down
1 change: 1 addition & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_64:
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES:
return PI_INVALID_VALUE;
case PI_DEVICE_INFO_IMAGE_SRGB: {
cl_bool result = true;
Expand Down
18 changes: 18 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ context_impl::get_info<info::context::atomic_memory_order_capabilities>()
sizeof(Result), &Result, nullptr);
return readMemoryOrderBitfield(Result);
}
template <>
std::vector<cl::sycl::memory_scope>
context_impl::get_info<info::context::atomic_memory_scope_capabilities>()
const {
if (is_host())
return {cl::sycl::memory_scope::work_item,
cl::sycl::memory_scope::sub_group,
cl::sycl::memory_scope::work_group, cl::sycl::memory_scope::device,
cl::sycl::memory_scope::system};

pi_memory_scope_capabilities Result;
getPlugin().call<PiApiKind::piContextGetInfo>(
MContext,
pi::cast<pi_context_info>(
info::context::atomic_memory_scope_capabilities),
sizeof(Result), &Result, nullptr);
return readMemoryScopeBitfield(Result);
}

RT::PiContext &context_impl::getHandleRef() { return MContext; }
const RT::PiContext &context_impl::getHandleRef() const { return MContext; }
Expand Down
22 changes: 22 additions & 0 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,21 @@ struct get_device_info<std::vector<memory_order>,
}
};

// Specialization for atomic_memory_scope_capabilities, PI returns a bitfield
template <>
struct get_device_info<std::vector<memory_scope>,
info::device::atomic_memory_scope_capabilities> {
static std::vector<memory_scope> get(RT::PiDevice dev, const plugin &Plugin) {
pi_memory_scope_capabilities result;
Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>(
dev,
pi::cast<RT::PiDeviceInfo>(
info::device::atomic_memory_scope_capabilities),
sizeof(pi_memory_scope_capabilities), &result, nullptr);
return readMemoryScopeBitfield(result);
}
};

// Specialization for exec_capabilities, OpenCL returns a bitfield
template <>
struct get_device_info<std::vector<info::execution_capability>,
Expand Down Expand Up @@ -764,6 +779,13 @@ get_device_info_host<info::device::atomic_memory_order_capabilities>() {
memory_order::acq_rel, memory_order::seq_cst};
}

template <>
inline std::vector<memory_scope>
get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
return {memory_scope::work_item, memory_scope::sub_group,
memory_scope::work_group, memory_scope::device, memory_scope::system};
}

template <>
inline cl_uint get_device_info_host<info::device::max_read_image_args>() {
// current value is the required minimum
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -4249,6 +4249,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65575EEENS3_12param_traitsIS4_XT_
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65808EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE69632EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device9getNativeEv
_ZNK2cl4sycl6kernel11get_backendEv
_ZNK2cl4sycl6kernel11get_contextEv
Expand Down Expand Up @@ -4348,6 +4349,7 @@ _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4225EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4228EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65553EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context9getNativeEv
_ZNK2cl4sycl7handler14getHandlerImplEv
_ZNK2cl4sycl7handler27isStateExplicitKernelBundleEv
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_windows.dump
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
??$get_info@$0BAAA@@device@sycl@cl@@QEBA?AW4device_type@info@12@XZ
??$get_info@$0BAAB@@device@sycl@cl@@QEBAIXZ
??$get_info@$0BAABA@@context@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BAABB@@context@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BAAC@@device@sycl@cl@@QEBAIXZ
??$get_info@$0BAACA@@device@sycl@cl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ
??$get_info@$0BAACB@@device@sycl@cl@@QEBAIXZ
Expand Down Expand Up @@ -115,6 +116,7 @@
??$get_info@$0BAJA@@queue@sycl@cl@@QEBA?AVcontext@12@XZ
??$get_info@$0BAJB@@queue@sycl@cl@@QEBA?AVdevice@12@XZ
??$get_info@$0BAJC@@queue@sycl@cl@@QEBAIXZ
??$get_info@$0BBAAA@@device@sycl@cl@@QEBA?AV?$vector@W4memory_scope@sycl@cl@@V?$allocator@W4memory_scope@sycl@cl@@@std@@@std@@XZ
??$get_info@$0BBGA@@program@sycl@cl@@QEBAIXZ
??$get_info@$0BBGB@@program@sycl@cl@@QEBA?AVcontext@12@XZ
??$get_info@$0BBGD@@program@sycl@cl@@QEBA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@XZ
Expand Down

0 comments on commit 43a4192

Please sign in to comment.