Skip to content

Commit

Permalink
Let the smart pointer manage the CUDA/HIP stream (kokkos#6721)
Browse files Browse the repository at this point in the history
* Let the smart pointer manage the CUDA/HIP stream

* Fixup do not null the stream in {Cuda/HIP}Internal::finalize()

* Fixup hip not tested either

* Not breaking HIP backend for 3rd time in a row

Co-authored-by: Bruno Turcksin <bruno.turcksin@gmail.com>

---------

Co-authored-by: Bruno Turcksin <bruno.turcksin@gmail.com>
  • Loading branch information
dalg24 and Rombur committed Jan 18, 2024
1 parent 179d2e6 commit 86f5bb7
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 32 deletions.
30 changes: 15 additions & 15 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void CudaInternal::print_configuration(std::ostream &s) const {
//----------------------------------------------------------------------------

CudaInternal::~CudaInternal() {
if (m_stream || m_scratchSpace || m_scratchFlags || m_scratchUnified) {
if (m_scratchSpace || m_scratchFlags || m_scratchUnified) {
std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
<< std::endl;
}
Expand Down Expand Up @@ -278,7 +278,7 @@ void CudaInternal::fence() const {
fence("Kokkos::CudaInternal::fence(): Unnamed Instance Fence");
}

void CudaInternal::initialize(cudaStream_t stream, bool manage_stream) {
void CudaInternal::initialize(cudaStream_t stream) {
KOKKOS_EXPECTS(!is_initialized());

if (was_finalized)
Expand Down Expand Up @@ -317,8 +317,7 @@ void CudaInternal::initialize(cudaStream_t stream, bool manage_stream) {
(void)scratch_space(reduce_block_count * 16 * sizeof(size_type));
}

m_stream = stream;
m_manage_stream = manage_stream;
m_stream = stream;
for (int i = 0; i < m_n_team_scratch; ++i) {
m_team_scratch_current_size[i] = 0;
m_team_scratch_ptr[i] = nullptr;
Expand Down Expand Up @@ -497,16 +496,12 @@ void CudaInternal::finalize() {
Kokkos::kokkos_free<Kokkos::CudaSpace>(m_team_scratch_ptr[i]);
}

if (m_manage_stream && get_stream() != nullptr)
KOKKOS_IMPL_CUDA_SAFE_CALL((cuda_stream_destroy_wrapper(m_stream)));

m_scratchSpaceCount = 0;
m_scratchFlagsCount = 0;
m_scratchUnifiedCount = 0;
m_scratchSpace = nullptr;
m_scratchFlags = nullptr;
m_scratchUnified = nullptr;
m_stream = nullptr;
for (int i = 0; i < m_n_team_scratch; ++i) {
m_team_scratch_current_size[i] = 0;
m_team_scratch_ptr[i] = nullptr;
Expand Down Expand Up @@ -642,8 +637,7 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
KOKKOS_IMPL_CUDA_SAFE_CALL(
cudaEventCreate(&Impl::CudaInternal::constantMemReusable));

Impl::CudaInternal::singleton().initialize(singleton_stream,
/*manage*/ true);
Impl::CudaInternal::singleton().initialize(singleton_stream);
}

void Cuda::impl_finalize() {
Expand All @@ -663,6 +657,8 @@ void Cuda::impl_finalize() {
cudaStreamDestroy(Impl::cuda_get_deep_copy_stream()));

Impl::CudaInternal::singleton().finalize();
KOKKOS_IMPL_CUDA_SAFE_CALL(
cudaStreamDestroy(Impl::CudaInternal::singleton().m_stream));
}

Cuda::Cuda()
Expand All @@ -677,13 +673,17 @@ KOKKOS_DEPRECATED Cuda::Cuda(cudaStream_t stream, bool manage_stream)
manage_stream ? Impl::ManageStream::yes : Impl::ManageStream::no) {}

Cuda::Cuda(cudaStream_t stream, Impl::ManageStream manage_stream)
: m_space_instance(new Impl::CudaInternal, [](Impl::CudaInternal *ptr) {
ptr->finalize();
delete ptr;
}) {
: m_space_instance(
new Impl::CudaInternal, [manage_stream](Impl::CudaInternal *ptr) {
ptr->finalize();
if (static_cast<bool>(manage_stream)) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamDestroy(ptr->m_stream));
}
delete ptr;
}) {
Impl::CudaInternal::singleton().verify_is_initialized(
"Cuda instance constructor");
m_space_instance->initialize(stream, static_cast<bool>(manage_stream));
m_space_instance->initialize(stream);
}

void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
Expand Down
3 changes: 1 addition & 2 deletions core/src/Cuda/Kokkos_Cuda_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ class CudaInternal {
mutable size_type* m_scratchFunctor;
cudaStream_t m_stream;
uint32_t m_instance_id;
bool m_manage_stream;

// Team Scratch Level 1 Space
int m_n_team_scratch = 10;
Expand All @@ -131,7 +130,7 @@ class CudaInternal {
return nullptr != m_scratchSpace && nullptr != m_scratchFlags;
}

void initialize(cudaStream_t stream, bool manage_stream);
void initialize(cudaStream_t stream);
void finalize();

void print_configuration(std::ostream&) const;
Expand Down
18 changes: 12 additions & 6 deletions core/src/HIP/Kokkos_HIP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ void HIP::impl_initialize(InitializationSettings const& settings) {

hipStream_t singleton_stream;
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamCreate(&singleton_stream));
Impl::HIPInternal::singleton().initialize(singleton_stream, /*manage*/ true);
Impl::HIPInternal::singleton().initialize(singleton_stream);
}

void HIP::impl_finalize() {
Expand All @@ -104,6 +104,8 @@ void HIP::impl_finalize() {
hipHostFree(Impl::HIPInternal::constantMemHostStaging));

Impl::HIPInternal::singleton().finalize();
KOKKOS_IMPL_HIP_SAFE_CALL(
hipStreamDestroy(Impl::HIPInternal::singleton().m_stream));
}

HIP::HIP()
Expand All @@ -114,13 +116,17 @@ HIP::HIP()
}

HIP::HIP(hipStream_t const stream, Impl::ManageStream manage_stream)
: m_space_instance(new Impl::HIPInternal, [](Impl::HIPInternal* ptr) {
ptr->finalize();
delete ptr;
}) {
: m_space_instance(
new Impl::HIPInternal, [manage_stream](Impl::HIPInternal* ptr) {
ptr->finalize();
if (static_cast<bool>(manage_stream)) {
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamDestroy(ptr->m_stream));
}
delete ptr;
}) {
Impl::HIPInternal::singleton().verify_is_initialized(
"HIP instance constructor");
m_space_instance->initialize(stream, static_cast<bool>(manage_stream));
m_space_instance->initialize(stream);
}

KOKKOS_DEPRECATED HIP::HIP(hipStream_t const stream, bool manage_stream)
Expand Down
9 changes: 2 additions & 7 deletions core/src/HIP/Kokkos_HIP_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,13 @@ void HIPInternal::fence(const std::string &name) const {
[&]() { KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamSynchronize(m_stream)); });
}

void HIPInternal::initialize(hipStream_t stream, bool manage_stream) {
void HIPInternal::initialize(hipStream_t stream) {
KOKKOS_EXPECTS(!is_initialized());

if (was_finalized)
Kokkos::abort("Calling HIP::initialize after HIP::finalize is illegal\n");

m_stream = stream;
m_manage_stream = manage_stream;
m_stream = stream;

//----------------------------------
// Multiblock reduction uses scratch flags for counters
Expand Down Expand Up @@ -340,14 +339,10 @@ void HIPInternal::finalize() {
Kokkos::kokkos_free<Kokkos::HIPSpace>(m_team_scratch_ptr[i]);
}

if (m_manage_stream && m_stream != nullptr)
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamDestroy(m_stream));

m_scratchSpaceCount = 0;
m_scratchFlagsCount = 0;
m_scratchSpace = nullptr;
m_scratchFlags = nullptr;
m_stream = nullptr;
for (int i = 0; i < m_n_team_scratch; ++i) {
m_team_scratch_current_size[i] = 0;
m_team_scratch_ptr[i] = nullptr;
Expand Down
3 changes: 1 addition & 2 deletions core/src/HIP/Kokkos_HIP_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ class HIPInternal {
uint32_t m_instance_id =
Kokkos::Tools::Experimental::Impl::idForInstance<HIP>(
reinterpret_cast<uintptr_t>(this));
bool m_manage_stream = false;

// Team Scratch Level 1 Space
int m_n_team_scratch = 10;
Expand All @@ -124,7 +123,7 @@ class HIPInternal {
return nullptr != m_scratchSpace && nullptr != m_scratchFlags;
}

void initialize(hipStream_t stream, bool manage_stream);
void initialize(hipStream_t stream);
void finalize();

void print_configuration(std::ostream &) const;
Expand Down

0 comments on commit 86f5bb7

Please sign in to comment.