From f867e71a9e83a58ddcc846999a071fccd4270111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 4 Sep 2024 18:54:24 +0100 Subject: [PATCH 1/4] [UR][Cuda] Add support for command-buffer kernel updates --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 4 ++-- sycl/source/detail/device_impl.cpp | 17 +++++++++++++---- sycl/source/detail/graph_impl.cpp | 1 + sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/unittests/helpers/UrMock.hpp | 17 +++++++++++++++-- 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index e4f3d3c9f01e4..b139694c72050 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/Bensuo/unified-runtime.git") # commit 2ad32681efd2c977f2c1f7f3a30d572d4c15499c # Author: Hugh Delaney # Date: Wed Sep 25 15:55:05 2024 +0100 @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Making a native queue doesn't require hDevice to be non null, but this # associates the queue with a null device, even if hContext contains valid # devices. - set(UNIFIED_RUNTIME_TAG 2ad32681efd2c977f2c1f7f3a30d572d4c15499c) + set(UNIFIED_RUNTIME_TAG d944ff3391dfbe69db453406bd0bbcb78716dee0) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 084cb5a6ec2a7..b184c30cad6fd 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const { return CallSuccessful && Result != nullptr; } case aspect::ext_oneapi_graph: { - bool SupportsCommandBufferUpdate = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; bool CallSuccessful = getAdapter()->call_nocheck( - MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate, + MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP, + sizeof(UpdateCapabilities), &UpdateCapabilities, nullptr) == UR_RESULT_SUCCESS; if (!CallSuccessful) { return false; } - return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate; + /* The kernel handle update capability is not yet required for the + * ext_oneapi_graph aspect */ + ur_device_command_buffer_update_capability_flags_t RequiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + + return has(aspect::ext_oneapi_limited_graph) && + (UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities; } case aspect::ext_oneapi_limited_graph: { bool SupportsCommandBuffers = false; diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index acdf6baf50475..3fbda06f69b62 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } + UpdateDesc.hNewKernel = UrKernel; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size(); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index acc61ab39d77d..41570dd11a3c1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(), + &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint, OutCommand); diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 71c9962155b2a..8cf9b69ebc550 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { case UR_DEVICE_INFO_AVAILABLE: case UR_DEVICE_INFO_LINKER_AVAILABLE: case UR_DEVICE_INFO_COMPILER_AVAILABLE: - case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { if (*params->ppPropValue) *static_cast(*params->ppPropValue) = true; if (*params->ppPropSizeRet) **params->ppPropSizeRet = sizeof(true); return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + if (*params->ppPropValue) + *static_cast( + *params->ppPropValue) = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = + sizeof(ur_device_command_buffer_update_capability_flags_t); + return UR_RESULT_SUCCESS; + } // This mock GPU device has no sub-devices case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { if (*params->ppPropSizeRet) { From 76fe29b21d70889c4af77ecfe47cdb0c65b3421a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Fri, 27 Sep 2024 11:12:26 +0100 Subject: [PATCH 2/4] Make hNewKernel nullptr --- sycl/source/detail/graph_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 3fbda06f69b62..57ddb2351a3f4 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1474,7 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } - UpdateDesc.hNewKernel = UrKernel; + UpdateDesc.hNewKernel = nullptr; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size(); From 2e3cbd395c7312aa82a134aa83583bfe491ccffc Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 30 Sep 2024 10:46:46 +0100 Subject: [PATCH 3/4] Update UR tag --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index aa7bd830e4bc7..1baa2d43675f7 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD) - # Merge: e824ddc2 f0a1c433 + # commit 532a4ecb72da4876cef61a4ae4d638e27ad609d5 + # Merge: 22962057 d944ff33 # Author: aarongreig - # Date: Fri Sep 27 16:54:04 2024 +0100 - # Merge pull request #2017 from nrspruit/new_sysman_init - # [L0] Use zesInit for SysMan API usage - set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90) + # Date: Mon Sep 30 10:43:10 2024 +0100 + # Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update + # Add support for command-buffer kernel updates + set(UNIFIED_RUNTIME_TAG 532a4ecb72da4876cef61a4ae4d638e27ad609d5) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need From 4d441d71348b2c84c3ad0b824c83845c3549012b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio?= Date: Mon, 30 Sep 2024 15:55:52 +0100 Subject: [PATCH 4/4] Revert back to passing the kernel explicitly when updating --- sycl/source/detail/graph_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 57ddb2351a3f4..3fbda06f69b62 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1474,7 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } - UpdateDesc.hNewKernel = nullptr; + UpdateDesc.hNewKernel = UrKernel; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size();