Skip to content

Commit

Permalink
Cuda multi-GPU support: Allow execution space instance constructor to…
Browse files Browse the repository at this point in the history
… run (kokkos#6706)

* Cuda multi-GPU support: Allow execution space instance constructor to run

* Skip a test

* Use cuda_stream/device also for UVM and HostPinned

* Clean up

* Revert test changes
  • Loading branch information
masterleinad committed Jan 24, 2024
1 parent a1a6ea1 commit 2dc7cbc
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 6 deletions.
7 changes: 1 addition & 6 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,7 @@ void CudaInternal::initialize(cudaStream_t stream) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaError_t(cuCtxGetDevice(&m_cudaDev)));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_cudaDev));

// FIXME_CUDA multiple devices
if (m_cudaDev != Cuda().cuda_device())
Kokkos::abort(
"Currently, the device id must match the device id used when Kokkos "
"was initialized!");
m_stream = stream;

//----------------------------------
// Multiblock reduction uses scratch flags for counters
Expand All @@ -317,7 +313,6 @@ void CudaInternal::initialize(cudaStream_t stream) {
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}

m_stream = stream;
for (int i = 0; i < m_n_team_scratch; ++i) {
m_team_scratch_current_size[i] = 0;
m_team_scratch_ptr[i] = nullptr;
Expand Down
6 changes: 6 additions & 0 deletions core/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@ if(Kokkos_ENABLE_CUDA)
UnitTestMain.cpp
cuda/TestCuda_InterOp_Streams.cpp
)
KOKKOS_ADD_EXECUTABLE_AND_TEST(
CoreUnitTest_CudaInterOpStreamsMultiGPU
SOURCES
UnitTestMain.cpp
cuda/TestCuda_InterOp_StreamsMultiGPU.cpp
)
KOKKOS_ADD_EXECUTABLE_AND_TEST(
CoreUnitTest_CudaGraph
SOURCES
Expand Down
49 changes: 49 additions & 0 deletions core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <TestCuda_Category.hpp>
#include <Test_InterOp_Streams.hpp>

namespace {
TEST(cuda, multi_gpu) {
Kokkos::initialize();

int n_devices;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaGetDeviceCount(&n_devices));

KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(0));
cudaStream_t stream0;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&stream0));

KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(n_devices - 1));
cudaStream_t stream;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&stream));

{
TEST_EXECSPACE space0(stream0);
ASSERT_EQ(space0.cuda_device(), 0);
TEST_EXECSPACE space(stream);
ASSERT_EQ(space.cuda_device(), n_devices - 1);
}
Kokkos::finalize();

KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(0));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(stream0));

KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(n_devices - 1));
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(stream));
}
} // namespace

0 comments on commit 2dc7cbc

Please sign in to comment.