From 2591186d9e1111631be0142a1308fd0f6cdb77fe Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 17 Sep 2025 16:30:29 +0200 Subject: [PATCH 1/5] [arcane,accelerator] Ajoute information sur la taille d'un warp dans 'DeviceInfo'. --- arcane/src/arcane/accelerator/core/DeviceInfo.h | 9 +++++++-- arcane/src/arcane/accelerator/core/Runner.cc | 1 + arcane/src/arcane/accelerator/core/internal/RunnerImpl.h | 4 +++- .../accelerator/cuda/runtime/CudaAcceleratorRuntime.cc | 1 + .../accelerator/hip/runtime/HipAcceleratorRuntime.cc | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arcane/src/arcane/accelerator/core/DeviceInfo.h b/arcane/src/arcane/accelerator/core/DeviceInfo.h index 6d71d66b34..51263fdd32 100644 --- a/arcane/src/arcane/accelerator/core/DeviceInfo.h +++ b/arcane/src/arcane/accelerator/core/DeviceInfo.h @@ -1,11 +1,11 @@ // -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- //----------------------------------------------------------------------------- -// Copyright 2000-2022 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) +// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) // See the top-level COPYRIGHT file for details. // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* DeviceInfo.h (C) 2000-2022 */ +/* DeviceInfo.h (C) 2000-2025 */ /* */ /* Information sur un device. */ /*---------------------------------------------------------------------------*/ @@ -44,12 +44,16 @@ class ARCANE_ACCELERATOR_CORE_EXPORT DeviceInfo //! Description du device. String description() const { return m_description; } + //! Taille d'un warp + Int32 warpSize() const { return m_warp_size; } + public: void setDeviceId(DeviceId id) { m_device_id = id; } void setUUIDAsString(const String& v) { m_uuid_as_string = v; } void setDescription(const String& v) { m_description = v; } void setName(const String& v) { m_name = v; } + void setWarpSize(Int32 v) { m_warp_size = v; } private: @@ -57,6 +61,7 @@ class ARCANE_ACCELERATOR_CORE_EXPORT DeviceInfo String m_name; String m_uuid_as_string; String m_description; + Int32 m_warp_size = 0; }; /*---------------------------------------------------------------------------*/ diff --git a/arcane/src/arcane/accelerator/core/Runner.cc b/arcane/src/arcane/accelerator/core/Runner.cc index e2ae0a6b25..f26163a5c1 100644 --- a/arcane/src/arcane/accelerator/core/Runner.cc +++ b/arcane/src/arcane/accelerator/core/Runner.cc @@ -112,6 +112,7 @@ initialize(Runner* runner, eExecutionPolicy v, DeviceId device) m_execution_policy = v; m_device_id = device; m_runtime = _getRuntime(v); + m_device_info = m_runtime->deviceInfoList()->deviceInfo(m_device_id.asInt32()); m_is_init = true; m_is_auto_prefetch_command = false; diff --git a/arcane/src/arcane/accelerator/core/internal/RunnerImpl.h b/arcane/src/arcane/accelerator/core/internal/RunnerImpl.h index e49eb909a3..2a660bf14c 100644 --- a/arcane/src/arcane/accelerator/core/internal/RunnerImpl.h +++ b/arcane/src/arcane/accelerator/core/internal/RunnerImpl.h @@ -16,7 +16,7 @@ #include "arcane/accelerator/core/AcceleratorCoreGlobal.h" -#include "arcane/accelerator/core/DeviceId.h" +#include "arcane/accelerator/core/DeviceInfo.h" #include "arcane/accelerator/core/internal/RunnerInternal.h" #include @@ -125,6 +125,7 @@ class RunnerImpl bool isInit() const { return m_is_init; } eDeviceReducePolicy reducePolicy() const { return m_reduce_policy; } DeviceId deviceId() const { return m_device_id; } + const DeviceInfo& deviceInfo() const { return m_device_info; } public: @@ -141,6 +142,7 @@ class RunnerImpl bool m_is_init = false; const eDeviceReducePolicy m_reduce_policy = eDeviceReducePolicy::Grid; DeviceId m_device_id; + DeviceInfo m_device_info; impl::IRunnerRuntime* m_runtime = nullptr; RunQueueImplStack m_run_queue_pool; std::mutex m_pool_mutex; diff --git a/arcane/src/arcane/accelerator/cuda/runtime/CudaAcceleratorRuntime.cc b/arcane/src/arcane/accelerator/cuda/runtime/CudaAcceleratorRuntime.cc index 946ebf0e69..e78405039b 100644 --- a/arcane/src/arcane/accelerator/cuda/runtime/CudaAcceleratorRuntime.cc +++ b/arcane/src/arcane/accelerator/cuda/runtime/CudaAcceleratorRuntime.cc @@ -603,6 +603,7 @@ fillDevices(bool is_verbose) device_info.setDescription(description); device_info.setDeviceId(DeviceId(i)); device_info.setName(dp.name); + device_info.setWarpSize(dp.warpSize); m_device_info_list.addDevice(device_info); } diff --git a/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc b/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc index 6c6de44d9c..275afd453c 100644 --- a/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc +++ b/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc @@ -450,6 +450,7 @@ fillDevices(bool is_verbose) device_info.setDescription(description); device_info.setDeviceId(DeviceId(i)); device_info.setName(dp.name); + device_info.setWarpSize(dp.warpSize); m_device_info_list.addDevice(device_info); } } From 167bdb235c9769fff3f5548e3229d590c6b33012 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 17 Sep 2025 16:37:50 +0200 Subject: [PATCH 2/5] =?UTF-8?q?[arcane,accelerator]=20Ajoute=20la=20taille?= =?UTF-8?q?=20d'un=20warp=20comme=20param=C3=A8tre=20template=20de=20'grid?= =?UTF-8?q?=5Freduce'=20et=20'block=5Freduce'.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../accelerator/CommonCudaHipReduceImpl.h | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h index db45f03a11..d6aa89118c 100644 --- a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h +++ b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h @@ -43,13 +43,7 @@ __device__ __forceinline__ unsigned int getBlockId() return blockId; } -#if defined(__HIP__) -constexpr const Int32 WARP_SIZE = warpSize; -#else -constexpr const Int32 WARP_SIZE = 32; -#endif constexpr const Int32 MAX_BLOCK_SIZE = 1024; -constexpr const Int32 MAX_WARPS = MAX_BLOCK_SIZE / WARP_SIZE; template class SimpleReduceOperator; @@ -154,9 +148,11 @@ ARCCORE_DEVICE inline Int64 shfl_sync(Int64 var, int laneMask) /*---------------------------------------------------------------------------*/ // Cette implémentation est celle de RAJA //! reduce values in block into thread 0 -template +template ARCCORE_DEVICE inline T block_reduce(T val, T identity) { + constexpr Int32 WARP_SIZE = WarpSize; + constexpr const Int32 MAX_WARPS = MAX_BLOCK_SIZE / WARP_SIZE; int numThreads = blockDim.x * blockDim.y * blockDim.z; int threadId = getThreadId(); @@ -173,8 +169,8 @@ ARCCORE_DEVICE inline T block_reduce(T val, T identity) T rhs = impl::shfl_xor_sync(temp, i); ReduceOperator::apply(temp, rhs); } - - } else { + } + else { // reduce each warp for (int i = 1; i < WARP_SIZE; i *= 2) { @@ -206,7 +202,8 @@ ARCCORE_DEVICE inline T block_reduce(T val, T identity) // read per warp values if (warpId * WARP_SIZE < numThreads) { temp = sd[warpId]; - } else { + } + else { temp = identity; } for (int i = 1; i < WARP_SIZE; i *= 2) { @@ -224,21 +221,21 @@ ARCCORE_DEVICE inline T block_reduce(T val, T identity) /*---------------------------------------------------------------------------*/ //! reduce values in grid into thread 0 of last running block // returns true if put reduced value in val -template +template ARCCORE_DEVICE inline bool -grid_reduce(T& val,T identity,SmallSpan device_mem,unsigned int* device_count) +grid_reduce(T& val, T identity, SmallSpan device_mem, unsigned int* device_count) { int numBlocks = gridDim.x * gridDim.y * gridDim.z; int numThreads = blockDim.x * blockDim.y * blockDim.z; int wrap_around = numBlocks - 1; int blockId = blockIdx.x + gridDim.x * blockIdx.y + - (gridDim.x * gridDim.y) * blockIdx.z; + (gridDim.x * gridDim.y) * blockIdx.z; int threadId = threadIdx.x + blockDim.x * threadIdx.y + - (blockDim.x * blockDim.y) * threadIdx.z; + (blockDim.x * blockDim.y) * threadIdx.z; - T temp = block_reduce(val, identity); + T temp = block_reduce(val, identity); // one thread per block writes to device_mem bool lastBlock = false; @@ -266,7 +263,7 @@ grid_reduce(T& val,T identity,SmallSpan device_mem,unsigned int* device_count ReduceOperator::apply(temp, device_mem[i]); } - temp = block_reduce(temp, identity); + temp = block_reduce(temp, identity); // one thread returns value if (threadId == 0) { @@ -280,9 +277,8 @@ grid_reduce(T& val,T identity,SmallSpan device_mem,unsigned int* device_count /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -template -ARCANE_INLINE_REDUCE ARCCORE_DEVICE -void _applyDeviceGeneric(const ReduceDeviceInfo& dev_info) +template +ARCANE_INLINE_REDUCE ARCCORE_DEVICE void _applyDeviceGeneric(const ReduceDeviceInfo& dev_info) { SmallSpan grid_buffer = dev_info.m_grid_buffer; DataType identity = dev_info.m_identity; @@ -291,23 +287,29 @@ void _applyDeviceGeneric(const ReduceDeviceInfo& dev_info) DataType v = dev_info.m_current_value; bool do_grid_reduce = dev_info.m_use_grid_reduce; +#if defined(__HIP__) + constexpr const Int32 WARP_SIZE = warpSize; +#else + constexpr const Int32 WARP_SIZE = 32; +#endif + //if (impl::getThreadId()==0){ // printf("BLOCK ID=%d %p s=%d ptr=%p %p use_grid_reduce=%d\n", // getBlockId(),grid_buffer.data(),grid_buffer.size(),ptr, // (void*)device_count,(do_grid_reduce)?1:0); //} - if (do_grid_reduce){ - bool is_done = grid_reduce(v,identity,grid_buffer,device_count); - if (is_done){ + if (do_grid_reduce) { + bool is_done = grid_reduce(v, identity, grid_buffer, device_count); + if (is_done) { *ptr = v; // Il est important de remettre cette à zéro pour la prochaine utilisation d'un Reducer. (*device_count) = 0; } } - else{ - DataType rv = impl::block_reduce(v,identity); - if (impl::getThreadId()==0){ - AtomicReduceOperator::apply(ptr,rv); + else { + DataType rv = impl::block_reduce(v, identity); + if (impl::getThreadId() == 0) { + AtomicReduceOperator::apply(ptr, rv); } } } From 6ef40e9ae345a8ddccd57832282542a4fa3395b6 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 17 Sep 2025 17:00:28 +0200 Subject: [PATCH 3/5] =?UTF-8?q?[arcane,accelerator]=20Ajoute=20support=20p?= =?UTF-8?q?our=20les=20r=C3=A9ductions=20avec=20ROCM=207.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avec ROCM 7, la taille d'un warp n'est plus connue à la compilation. C'est 32 ou 64 en fonction de l'architecture et une même carte peut supporter les 2 valeurs (les cartes RDNA). On utilise une classe template avec la taille d'un warp comme paramètre pour gérer les deux cas. Cela permet aussi de ne pas changer le code pour les autres plateformes. --- .../accelerator/CommonCudaHipReduceImpl.h | 35 +++++++++++++++++-- arcane/src/arcane/accelerator/Reduce.h | 4 +++ .../accelerator/core/IReduceMemoryImpl.h | 14 ++++---- .../accelerator/core/ReduceMemoryImpl.cc | 1 + 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h index d6aa89118c..8913d89a2e 100644 --- a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h +++ b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h @@ -286,11 +286,22 @@ ARCANE_INLINE_REDUCE ARCCORE_DEVICE void _applyDeviceGeneric(const ReduceDeviceI DataType* ptr = dev_info.m_device_final_ptr; DataType v = dev_info.m_current_value; bool do_grid_reduce = dev_info.m_use_grid_reduce; - +#if HIP_VERSION_MAJOR >= 7 + // A partir de ROCM 7, il n'est pas possible de savoir à la compilation + // la taille d'un warp. C'est 32 ou 64. Pour contourner ce problème, + // on utilise deux instantiations de la reduction et on choisit + // dynamiquement. C'est probablement un peu moins performant qu'avec + // l'ancien mécanisme. Une autre solution serait de choisir à la + // compilation la taille d'un warp. Cela est possible sur les architectures + // HPC comme les MI300 car cette valeur est fixe. Mais sur les architectures + // RDNA les deux valeurs sont possibles. + const Int32 warp_size = dev_info.m_warp_size; +#else #if defined(__HIP__) constexpr const Int32 WARP_SIZE = warpSize; #else constexpr const Int32 WARP_SIZE = 32; +#endif #endif //if (impl::getThreadId()==0){ @@ -299,15 +310,35 @@ ARCANE_INLINE_REDUCE ARCCORE_DEVICE void _applyDeviceGeneric(const ReduceDeviceI // (void*)device_count,(do_grid_reduce)?1:0); //} if (do_grid_reduce) { +#if HIP_VERSION_MAJOR >= 7 + bool is_done = false; + if (warp_size == 64) + is_done = grid_reduce(v, identity, grid_buffer, device_count); + else if (warp_size == 32) + is_done = grid_reduce(v, identity, grid_buffer, device_count); + else + assert("Bad warp size (should be 32 or 64)"); +#else bool is_done = grid_reduce(v, identity, grid_buffer, device_count); +#endif if (is_done) { *ptr = v; - // Il est important de remettre cette à zéro pour la prochaine utilisation d'un Reducer. + // Il est important de remettre cette variable à zéro pour la prochaine utilisation d'un Reducer. (*device_count) = 0; } } else { +#if HIP_VERSION_MAJOR >= 7 + DataType rv; + if (warp_size == 64) + rv = impl::block_reduce(v, identity); + else if (warp_size == 32) + rv = impl::block_reduce(v, identity); + else + assert("Bad warp size (should be 32 or 64)"); +#else DataType rv = impl::block_reduce(v, identity); +#endif if (impl::getThreadId() == 0) { AtomicReduceOperator::apply(ptr, rv); } diff --git a/arcane/src/arcane/accelerator/Reduce.h b/arcane/src/arcane/accelerator/Reduce.h index 6df8e698a7..36dec62004 100644 --- a/arcane/src/arcane/accelerator/Reduce.h +++ b/arcane/src/arcane/accelerator/Reduce.h @@ -110,6 +110,9 @@ class ReduceDeviceInfo //! Indique si on utilise la réduction par grille (sinon on utilise les atomiques) bool m_use_grid_reduce = true; + + //! Taille d'un warp + Int32 m_warp_size = 0; }; /*---------------------------------------------------------------------------*/ @@ -440,6 +443,7 @@ class HostDeviceReducerBase dvi.m_current_value = m_local_value; dvi.m_identity = m_identity; dvi.m_use_grid_reduce = m_grid_memory_info.m_reduce_policy != eDeviceReducePolicy::Atomic; + dvi.m_warp_size = m_grid_memory_info.m_warp_size; ReduceFunctor::applyDevice(dvi); //grid_buffer,m_grid_device_count,m_host_or_device_memory_for_reduced_value,m_local_value,m_identity); #else // printf("Destroy host parent_value=%p this=%p\n",(void*)m_parent_value,(void*)this); diff --git a/arcane/src/arcane/accelerator/core/IReduceMemoryImpl.h b/arcane/src/arcane/accelerator/core/IReduceMemoryImpl.h index 8e75ed8893..0a05fc040a 100644 --- a/arcane/src/arcane/accelerator/core/IReduceMemoryImpl.h +++ b/arcane/src/arcane/accelerator/core/IReduceMemoryImpl.h @@ -1,16 +1,16 @@ // -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- //----------------------------------------------------------------------------- -// Copyright 2000-2023 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) +// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) // See the top-level COPYRIGHT file for details. // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* IReduceMemoryImpl.h (C) 2000-2023 */ +/* IReduceMemoryImpl.h (C) 2000-2025 */ /* */ /* Interface de la gestion mémoire pour les réductions. */ /*---------------------------------------------------------------------------*/ -#ifndef ARCANE_ACCELERATOR_IREDUCEMEMORYIMPL_H -#define ARCANE_ACCELERATOR_IREDUCEMEMORYIMPL_H +#ifndef ARCANE_ACCELERATOR_CORE_IREDUCEMEMORYIMPL_H +#define ARCANE_ACCELERATOR_CORE_IREDUCEMEMORYIMPL_H /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ @@ -18,8 +18,6 @@ #include "arcane/utils/MemoryView.h" -#include - /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ @@ -40,8 +38,6 @@ class ARCANE_ACCELERATOR_CORE_EXPORT IReduceMemoryImpl //! Informations mémoire pour la réduction sur les accélérateurs struct GridMemoryInfo { - public: - //! Mémoire allouée pour la réduction sur une grille (de taille nb_bloc * sizeof(T)) MutableMemoryView m_grid_memory_values; //! Entier utilisé pour compter le nombre de blocs ayant déjà fait leur partie de la réduction @@ -50,6 +46,8 @@ class ARCANE_ACCELERATOR_CORE_EXPORT IReduceMemoryImpl eDeviceReducePolicy m_reduce_policy = eDeviceReducePolicy::Grid; //! Pointeur vers la mémoire sur l'hôte contenant la valeur réduite. void* m_host_memory_for_reduced_value = nullptr; + //! Taille d'un warp + Int32 m_warp_size = 64; }; public: diff --git a/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc b/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc index 2058d282ed..5aa1a8486c 100644 --- a/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc +++ b/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc @@ -49,6 +49,7 @@ ReduceMemoryImpl(RunCommandImpl* p) { _allocateMemoryForReduceData(128); _allocateMemoryForGridDeviceCount(); + m_grid_memory_info.m_warp_size = p->runner()->deviceInfo().warpSize(); } /*---------------------------------------------------------------------------*/ From b6541a537f43934495e70958d70432561293da68 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 17 Sep 2025 17:21:19 +0200 Subject: [PATCH 4/5] =?UTF-8?q?[arcane,accelerator]=20R=C3=A9cup=C3=A8re?= =?UTF-8?q?=20et=20affiche=20le=20num=C3=A9ro=20de=20version=20du=20runtim?= =?UTF-8?q?e=20et=20du=20driver=20pour=20HIP.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../hip/runtime/HipAcceleratorRuntime.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc b/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc index 275afd453c..8716fb1ba5 100644 --- a/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc +++ b/arcane/src/arcane/accelerator/hip/runtime/HipAcceleratorRuntime.cc @@ -401,7 +401,24 @@ fillDevices(bool is_verbose) int has_managed_memory = 0; ARCANE_CHECK_HIP(hipDeviceGetAttribute(&has_managed_memory, hipDeviceAttributeManagedMemory, i)); + // Le format des versions dans HIP est: + // HIP_VERSION = (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH) + + int runtime_version = 0; + ARCANE_CHECK_HIP(hipRuntimeGetVersion(&runtime_version)); + //runtime_version /= 10000; + int runtime_major = runtime_version / 10000000; + int runtime_minor = (runtime_version / 100000) % 100; + + int driver_version = 0; + ARCANE_CHECK_HIP(hipDriverGetVersion(&driver_version)); + //driver_version /= 10000; + int driver_major = driver_version / 10000000; + int driver_minor = (driver_version / 100000) % 100; + o << "\nDevice " << i << " name=" << dp.name << "\n"; + o << " Driver version = " << driver_major << "." << (driver_minor) << "." << (driver_version % 100000) << "\n"; + o << " Runtime version = " << runtime_major << "." << (runtime_minor) << "." << (runtime_version % 100000) << "\n"; o << " computeCapability = " << dp.major << "." << dp.minor << "\n"; o << " totalGlobalMem = " << dp.totalGlobalMem << "\n"; o << " sharedMemPerBlock = " << dp.sharedMemPerBlock << "\n"; From 01de579f6a269e7d09ebd5be5524edd0d8a11be3 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 17 Sep 2025 17:33:04 +0200 Subject: [PATCH 5/5] [arcane,accelerator] Corrige copyright. --- .../accelerator/CommonCudaHipReduceImpl.h | 21 ++++++++----------- .../accelerator/core/ReduceMemoryImpl.cc | 4 ++-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h index 8913d89a2e..6095c5a978 100644 --- a/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h +++ b/arcane/src/arcane/accelerator/CommonCudaHipReduceImpl.h @@ -1,11 +1,11 @@ // -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- //----------------------------------------------------------------------------- -// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) +// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) // See the top-level COPYRIGHT file for details. // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* CommonCudaHipReduceImpl.h (C) 2000-2024 */ +/* CommonCudaHipReduceImpl.h (C) 2000-2025 */ /* */ /* Implémentation CUDA et HIP des réductions. */ /*---------------------------------------------------------------------------*/ @@ -33,7 +33,7 @@ namespace Arcane::Accelerator::impl __device__ __forceinline__ unsigned int getThreadId() { int threadId = threadIdx.x + blockDim.x * threadIdx.y + - (blockDim.x * blockDim.y) * threadIdx.z; + (blockDim.x * blockDim.y) * threadIdx.z; return threadId; } @@ -348,8 +348,7 @@ ARCANE_INLINE_REDUCE ARCCORE_DEVICE void _applyDeviceGeneric(const ReduceDeviceI /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -template ARCANE_INLINE_REDUCE ARCCORE_DEVICE -void ReduceFunctorSum:: +template ARCANE_INLINE_REDUCE ARCCORE_DEVICE void ReduceFunctorSum:: _applyDevice(const ReduceDeviceInfo& dev_info) { using ReduceOperator = impl::SimpleReduceOperator; @@ -360,20 +359,18 @@ _applyDevice(const ReduceDeviceInfo& dev_info) /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -template ARCANE_INLINE_REDUCE ARCCORE_DEVICE -void ReduceFunctorMax:: +template ARCANE_INLINE_REDUCE ARCCORE_DEVICE void ReduceFunctorMax:: _applyDevice(const ReduceDeviceInfo& dev_info) { - using ReduceOperator = impl::SimpleReduceOperator; - using AtomicReduceOperator = impl::CommonCudaHipAtomic; - _applyDeviceGeneric(dev_info); + using ReduceOperator = impl::SimpleReduceOperator; + using AtomicReduceOperator = impl::CommonCudaHipAtomic; + _applyDeviceGeneric(dev_info); } /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -template ARCANE_INLINE_REDUCE ARCCORE_DEVICE -void ReduceFunctorMin:: +template ARCANE_INLINE_REDUCE ARCCORE_DEVICE void ReduceFunctorMin:: _applyDevice(const ReduceDeviceInfo& dev_info) { using ReduceOperator = impl::SimpleReduceOperator; diff --git a/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc b/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc index 5aa1a8486c..9606565650 100644 --- a/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc +++ b/arcane/src/arcane/accelerator/core/ReduceMemoryImpl.cc @@ -1,11 +1,11 @@ // -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*- //----------------------------------------------------------------------------- -// Copyright 2000-2024 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) +// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com) // See the top-level COPYRIGHT file for details. // SPDX-License-Identifier: Apache-2.0 //----------------------------------------------------------------------------- /*---------------------------------------------------------------------------*/ -/* ReduceMemoryImpl.cc (C) 2000-2024 */ +/* ReduceMemoryImpl.cc (C) 2000-2025 */ /* */ /* Gestion de la mémoire pour les réductions. */ /*---------------------------------------------------------------------------*/