fix: CudaMemAdvise for Grace-Hopper

Do not call this function for buffer of size 0.
crtrott · May 7, 2024 · 62ba653 · 62ba653
1 parent 0f25d2b
commit 62ba653
Showing 1 changed file with 10 additions and 9 deletions.
diff --git a/core/src/Cuda/Kokkos_CudaSpace.cpp b/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -193,15 +193,16 @@ void *impl_allocate_common(const int device_id,
   // The idea is to use host allocator and then adivce to keep it in HBM on
   // device, but that requires CUDA 12.2
   static_assert(CUDART_VERSION >= 12020);
-  ptr = malloc(arg_alloc_size);
-  // TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr?
-  // One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would
-  // work but it doesn't. I.e. the order of members doesn't seem to be defined.
-  cudaMemLocation loc;
-  loc.id   = device_id;
-  loc.type = cudaMemLocationTypeDevice;
-  KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
-      ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
+  if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
+    ptr = malloc(arg_alloc_size);
+    // TODO: what error do we want to throw here if it fails, i.e. ptr == nullptr? One would thing cudaMemLocation{device_id, cudaMemLocationTypeDeivce} would
+    // work but it doesn't. I.e. the order of members doesn't seem to be defined.
+    cudaMemLocation loc;
+    loc.id   = device_id;
+    loc.type = cudaMemLocationTypeDevice;
+    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
+        ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
+  }
 #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
   if (arg_alloc_size >= memory_threshold_g) {
     error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);