From 097fe4ab00feb29e17d53db33993ad2533c65d8f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 19 Aug 2021 11:17:47 +0200 Subject: [PATCH] Fix uploading the EventSetup conditions to multiple CUDA devices Associate to the correct CUDA device the events used to track if the conditions have been transferred to each device. --- HeterogeneousCore/CUDACore/interface/ESProduct.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/CUDACore/interface/ESProduct.h b/HeterogeneousCore/CUDACore/interface/ESProduct.h index 676d3e9d1c0d9..8740095292380 100644 --- a/HeterogeneousCore/CUDACore/interface/ESProduct.h +++ b/HeterogeneousCore/CUDACore/interface/ESProduct.h @@ -9,6 +9,7 @@ #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/CUDAServices/interface/numberOfDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h" +#include "HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" #include "HeterogeneousCore/CUDAUtilities/interface/eventWorkHasCompleted.h" @@ -19,10 +20,13 @@ namespace cms { class ESProduct { public: ESProduct() : gpuDataPerDevice_(numberOfDevices()) { + cms::cuda::ScopedSetDevice scopedDevice; for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) { + scopedDevice.set(i); gpuDataPerDevice_[i].m_event = getEventCache().get(); } } + ~ESProduct() = default; // transferAsync should be a function of (T&, cudaStream_t) @@ -30,12 +34,10 @@ namespace cms { // to the CUDA stream template const T& dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const { - auto device = currentDevice(); - + int device = currentDevice(); auto& data = gpuDataPerDevice_[device]; - // If GPU data has already been filled, we can return it - // immediately + // If the GPU data has already been filled, we can return it immediately if (not data.m_filled.load()) { // It wasn't, so need to fill it std::scoped_lock lk{data.m_mutex}; @@ -103,4 +105,4 @@ namespace cms { } // namespace cuda } // namespace cms -#endif +#endif // HeterogeneousCore_CUDACore_ESProduct_h