Skip to content

Commit

Permalink
fix: CudaMemAdvise for Grace-Hopper
Browse files Browse the repository at this point in the history
Do not call this function for buffer of size 0.
  • Loading branch information
cedricchevalier19 committed May 7, 2024
1 parent 0f25d2b commit 62ba653
Showing 1 changed file with 10 additions and 9 deletions.
19 changes: 10 additions & 9 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,16 @@ void *impl_allocate_common(const int device_id,
// The idea is to use host allocator and then adivce to keep it in HBM on
// device, but that requires CUDA 12.2
static_assert(CUDART_VERSION >= 12020);
ptr = malloc(arg_alloc_size);
// TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr?
// One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would
// work but it doesn't. I.e. the order of members doesn't seem to be defined.
cudaMemLocation loc;
loc.id = device_id;
loc.type = cudaMemLocationTypeDevice;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
ptr = malloc(arg_alloc_size);
// TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr? One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would
// work but it doesn't. I.e. the order of members doesn't seem to be defined.
cudaMemLocation loc;
loc.id = device_id;
loc.type = cudaMemLocationTypeDevice;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
}
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
if (arg_alloc_size >= memory_threshold_g) {
error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);
Expand Down

0 comments on commit 62ba653

Please sign in to comment.