Skip to content

Commit

Permalink
Threads remove unused variables and functions (kokkos#6566)
Browse files Browse the repository at this point in the history
* Remove wait_yields from the public interface

* Remove unused get_thread_count functions

* Remove unimplemented team_size_valid

* Remove unused numa_rank and numa_core_rank

* Remove useless include

* Simplify code

* fix indentation
  • Loading branch information
Rombur committed Nov 1, 2023
1 parent 0e5aa15 commit 6da3fa7
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 67 deletions.
81 changes: 26 additions & 55 deletions core/src/Threads/Kokkos_Threads_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,10 @@

#include <Kokkos_Macros.hpp>

#include <cstdint>
#include <limits>
#include <utility>
#include <iostream>
#include <sstream>
#include <thread>
#include <mutex>

#include <Kokkos_Core.hpp>

Expand Down Expand Up @@ -82,6 +79,12 @@ inline unsigned fan_size(const unsigned rank, const unsigned size) {
return count;
}

void wait_yield(volatile ThreadState &flag, const ThreadState value) {
while (value == flag) {
std::this_thread::yield();
}
}

} // namespace
} // namespace Impl
} // namespace Kokkos
Expand All @@ -100,13 +103,6 @@ bool ThreadsInternal::is_process() {

//----------------------------------------------------------------------------

void ThreadsInternal::wait_yield(volatile ThreadState &flag,
const ThreadState value) {
while (value == flag) {
std::this_thread::yield();
}
}

void execute_function_noop(ThreadsInternal &, const void *) {}

void ThreadsInternal::driver() {
Expand All @@ -129,8 +125,6 @@ ThreadsInternal::ThreadsInternal()
m_scratch(nullptr),
m_scratch_reduce_end(0),
m_scratch_thread_end(0),
m_numa_rank(0),
m_numa_core_rank(0),
m_pool_rank(0),
m_pool_size(0),
m_pool_fan_size(0),
Expand All @@ -150,17 +144,12 @@ ThreadsInternal::ThreadsInternal()
// Given a good entry set this thread in the 's_threads_exec' array
if (entry < s_thread_pool_size[0] &&
nil == atomic_compare_exchange(s_threads_exec + entry, nil, this)) {
const std::pair<unsigned, unsigned> coord =
Kokkos::hwloc::get_this_thread_coordinate();

m_numa_rank = coord.first;
m_numa_core_rank = coord.second;
m_pool_base = s_threads_exec;
m_pool_rank = s_thread_pool_size[0] - (entry + 1);
m_pool_rank_rev = s_thread_pool_size[0] - (pool_rank() + 1);
m_pool_size = s_thread_pool_size[0];
m_pool_fan_size = fan_size(m_pool_rank, m_pool_size);
m_pool_state = ThreadState::Active;
m_pool_base = s_threads_exec;
m_pool_rank = s_thread_pool_size[0] - (entry + 1);
m_pool_rank_rev = s_thread_pool_size[0] - (pool_rank() + 1);
m_pool_size = s_thread_pool_size[0];
m_pool_fan_size = fan_size(m_pool_rank, m_pool_size);
m_pool_state = ThreadState::Active;

s_threads_pid[m_pool_rank] = std::this_thread::get_id();

Expand Down Expand Up @@ -196,8 +185,6 @@ ThreadsInternal::~ThreadsInternal() {
m_pool_base = nullptr;
m_scratch_reduce_end = 0;
m_scratch_thread_end = 0;
m_numa_rank = 0;
m_numa_core_rank = 0;
m_pool_rank = 0;
m_pool_size = 0;
m_pool_fan_size = 0;
Expand All @@ -213,8 +200,6 @@ ThreadsInternal::~ThreadsInternal() {
}
}

int ThreadsInternal::get_thread_count() { return s_thread_pool_size[0]; }

ThreadsInternal *ThreadsInternal::get_thread(const int init_thread_rank) {
ThreadsInternal *const th =
init_thread_rank < s_thread_pool_size[0]
Expand Down Expand Up @@ -460,22 +445,17 @@ void ThreadsInternal::print_configuration(std::ostream &s, const bool detail) {

fence();

const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core =
Kokkos::hwloc::get_available_threads_per_core();

// Forestall compiler warnings for unused variables.
(void)numa_count;
(void)cores_per_numa;
(void)threads_per_core;

s << "Kokkos::Threads";

#if defined(KOKKOS_ENABLE_THREADS)
s << " KOKKOS_ENABLE_THREADS";
#endif
#if defined(KOKKOS_ENABLE_HWLOC)
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core =
Kokkos::hwloc::get_available_threads_per_core();

s << " hwloc[" << numa_count << "x" << cores_per_numa << "x"
<< threads_per_core << "]";
#endif
Expand All @@ -496,14 +476,12 @@ void ThreadsInternal::print_configuration(std::ostream &s, const bool detail) {
if (th) {
const int rank_rev = th->m_pool_size - (th->m_pool_rank + 1);

s << " Thread[ " << th->m_pool_rank << " : " << th->m_numa_rank << "."
<< th->m_numa_core_rank << " ]";
s << " Thread[ " << th->m_pool_rank << " ]";

s << " Fan{";
for (int j = 0; j < th->m_pool_fan_size; ++j) {
ThreadsInternal *const thfan = th->m_pool_base[rank_rev + (1 << j)];
s << " [ " << thfan->m_pool_rank << " : " << thfan->m_numa_rank
<< "." << thfan->m_numa_core_rank << " ]";
s << " [ " << thfan->m_pool_rank << " ]";
}
s << " }";

Expand Down Expand Up @@ -616,13 +594,8 @@ void ThreadsInternal::initialize(int thread_count_arg) {
Kokkos::hwloc::bind_this_thread(proc_coord);
}

const std::pair<unsigned, unsigned> coord =
Kokkos::hwloc::get_this_thread_coordinate();

s_threads_exec[0] = &s_threads_process;
s_threads_process.m_numa_rank = coord.first;
s_threads_process.m_numa_core_rank = coord.second;
s_threads_process.m_pool_base = s_threads_exec;
s_threads_exec[0] = &s_threads_process;
s_threads_process.m_pool_base = s_threads_exec;
s_threads_process.m_pool_rank =
thread_count - 1; // Reversed for scan-compatible reductions
s_threads_process.m_pool_size = thread_count;
Expand Down Expand Up @@ -711,13 +684,11 @@ void ThreadsInternal::finalize() {
s_thread_pool_size[2] = 0;

// Reset master thread to run solo.
s_threads_process.m_numa_rank = 0;
s_threads_process.m_numa_core_rank = 0;
s_threads_process.m_pool_base = nullptr;
s_threads_process.m_pool_rank = 0;
s_threads_process.m_pool_size = 1;
s_threads_process.m_pool_fan_size = 0;
s_threads_process.m_pool_state = ThreadState::Inactive;
s_threads_process.m_pool_base = nullptr;
s_threads_process.m_pool_rank = 0;
s_threads_process.m_pool_size = 1;
s_threads_process.m_pool_fan_size = 0;
s_threads_process.m_pool_state = ThreadState::Inactive;

Kokkos::Profiling::finalize();
}
Expand Down
12 changes: 0 additions & 12 deletions core/src/Threads/Kokkos_Threads_Instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ class ThreadsInternal {
void *m_scratch;
int m_scratch_reduce_end;
size_t m_scratch_thread_end;
int m_numa_rank;
int m_numa_core_rank;
int m_pool_rank;
int m_pool_rank_rev;
int m_pool_size;
Expand Down Expand Up @@ -89,11 +87,8 @@ class ThreadsInternal {
public:
KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size; }
KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank; }
KOKKOS_INLINE_FUNCTION int numa_rank() const { return m_numa_rank; }
KOKKOS_INLINE_FUNCTION int numa_core_rank() const { return m_numa_core_rank; }
inline long team_work_index() const { return m_team_work_index; }

static int get_thread_count();
static ThreadsInternal *get_thread(const int init_thread_rank);

inline void *reduce_memory() const { return m_scratch; }
Expand Down Expand Up @@ -125,15 +120,8 @@ class ThreadsInternal {

static void finalize();

/* Given a requested team size, return valid team size */
static unsigned team_size_valid(unsigned);

static void print_configuration(std::ostream &, const bool detail = false);

//------------------------------------

static void wait_yield(volatile ThreadState &, const ThreadState);

//------------------------------------
// All-thread functions:

Expand Down

0 comments on commit 6da3fa7

Please sign in to comment.