Skip to content

Commit

Permalink
Fix CUDA unified memory allocator
Browse files Browse the repository at this point in the history
- add correct dealloc fence
- add error reporting
  • Loading branch information
Christian Robert Trott committed May 24, 2024
1 parent 24d9381 commit 27796d9
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,20 @@ void *impl_allocate_common(const int device_id,
static_assert(CUDART_VERSION >= 12020);
if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
ptr = malloc(arg_alloc_size);
// TODO: what error do we want to throw here if it fails, i.e. ptr ==
// nullptr? One would thing cudaMemLocation{device_id,
if(ptr != nullptr) {
// One would think cudaMemLocation{device_id,
// cudaMemLocationTypeDeivce} would work but it doesn't. I.e. the order of
// members doesn't seem to be defined.
cudaMemLocation loc;
loc.id = device_id;
loc.type = cudaMemLocationTypeDevice;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
} else {
// I think this is the most logical error to return unless we
// want a different mechanism for this code path
error_code = cudaErrorMemoryAllocation;
}
}
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
Expand Down Expand Up @@ -373,6 +378,8 @@ void CudaSpace::impl_deallocate(
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
Impl::cuda_device_synchronize(
"Kokkos::Cuda: backend fence before unified memory free");
free(arg_alloc_ptr);
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
Expand Down

0 comments on commit 27796d9

Please sign in to comment.