Skip to content

Commit

Permalink
Use environment variables to control OpenMP behavior in celer-sim (#1073
Browse files Browse the repository at this point in the history
)
  • Loading branch information
amandalund committed Dec 30, 2023
1 parent 84e7dcd commit 8ec6a20
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 21 deletions.
30 changes: 16 additions & 14 deletions app/celer-sim/Runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#include <utility>
#include <vector>

#ifdef _OPENMP
# include <omp.h>
#endif

#include "corecel/cont/Span.hh"
#include "corecel/io/Logger.hh"
#include "corecel/io/OutputRegistry.hh"
Expand Down Expand Up @@ -73,29 +77,27 @@ namespace
{
//---------------------------------------------------------------------------//
/*!
* Get the number of streams from the OMP_NUM_THREADS environment variable.
*
* The value of OMP_NUM_THREADS should be a list of positive integers, each of
* which sets the number of threads for the parallel region at the
* corresponding nested level. The number of streams is set to the first value
* in the list.
* Get the number of streams from the number of OpenMP threads.
*
* \note For a multithreaded CPU run, if OMP_NUM_THREADS is set to a single
* value, the number of threads for each nested parallel region will be set to
* that value.
* The OMP_NUM_THREADS environment variable can be used to control the number
* of threads/streams. The value of OMP_NUM_THREADS should be a list of
* positive integers, each of which sets the number of threads for the parallel
* region at the corresponding nested level. The number of streams is set to
* the first value in the list. If OMP_NUM_THREADS is not set, the value will
* be implementation defined.
*/
size_type calc_num_streams(RunnerInput const& inp, size_type num_events)
{
size_type num_threads = 1;
#if CELERITAS_USE_OPENMP
if (!inp.merge_events)
{
std::string const& nt_str = celeritas::getenv("OMP_NUM_THREADS");
if (!nt_str.empty())
# pragma omp parallel
{
auto nt = std::stoi(nt_str);
CELER_VALIDATE(nt > 0, << "nonpositive num_streams=" << nt);
num_threads = static_cast<size_type>(nt);
if (omp_get_thread_num() == 0)
{
num_threads = omp_get_num_threads();
}
}
}
#else
Expand Down
8 changes: 1 addition & 7 deletions app/celer-sim/celer-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,7 @@ void run(std::istream* is, std::shared_ptr<OutputRegistry> output)
<< " on " << num_streams << " threads";
MultiExceptionHandler capture_exception;
#ifdef _OPENMP
// Set the maximum number of nested parallel regions
// TODO: Enable nested OpenMP parallel regions for multithreaded CPU
// once the performance issues have been resolved. For now, limit the
// level of nesting to a single parallel region (over events) and
// deactivate any deeper nested parallel regions.
omp_set_max_active_levels(1);
# pragma omp parallel for num_threads(num_streams)
# pragma omp parallel for
#endif
for (size_type event = 0; event < run_stream.num_events(); ++event)
{
Expand Down

0 comments on commit 8ec6a20

Please sign in to comment.