Skip to content

Commit

Permalink
Use a condition_variable to trigger quicker printing of finishing sec…
Browse files Browse the repository at this point in the history
…tions refs idaholab#15444
  • Loading branch information
friedmud committed Jun 30, 2020
1 parent 042a04e commit 3c27278
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 10 deletions.
14 changes: 14 additions & 0 deletions framework/include/utils/PerfGraph.h
Expand Up @@ -22,6 +22,8 @@
#include <array>
#include <atomic>
#include <thread>
#include <future>
#include <mutex>

// Forward Declarations
class PerfGuard;
Expand Down Expand Up @@ -355,6 +357,9 @@ class PerfGraph : protected ConsoleStreamInterface
*/
void printHeaviestSections(const ConsoleStream & console);

/// This processor id
processor_id_type _pid;

/// The name (handle) of the root node
static const std::string ROOT_NAME;

Expand Down Expand Up @@ -405,12 +410,21 @@ class PerfGraph : protected ConsoleStreamInterface
/// Whether or not timing is active
bool _active;

/// The promise to the print thread that will signal when to stop
std::promise<bool> _done;

/// The object that is doing live printing
std::unique_ptr<PerfGraphLivePrint> _live_print;

/// The thread for printing sections as they execute
std::thread _print_thread;

/// The mutex to use with a condition_variable for waking up the print thread
std::mutex _print_thread_mutex;

/// The condition_variable to wake the print thread
std::condition_variable _finished_section;

// Here so PerfGuard is the only thing that can call push/pop
friend class PerfGuard;
friend class PerfGraphLivePrint;
Expand Down
2 changes: 2 additions & 0 deletions framework/include/utils/PerfGraphLivePrint.h
Expand Up @@ -44,6 +44,8 @@ class PerfGraphLivePrint : protected ConsoleStreamInterface

std::array<PerfGraph::SectionIncrement, MAX_EXECUTION_LIST_SIZE> & _execution_list;

std::future<bool> _done_future;

std::map<PerfID, PerfGraph::SectionInfo> & _id_to_section_info;

/// This is one beyond the last thing on the stack
Expand Down
33 changes: 27 additions & 6 deletions framework/src/utils/PerfGraph.C
Expand Up @@ -13,6 +13,7 @@
#include "PerfGuard.h"
#include "MooseError.h"
#include "PerfGraphLivePrint.h"
#include "MooseApp.h"

// Note: do everything we can to make sure this only gets #included
// in the .C file... this is a heavily templated header that we
Expand All @@ -27,11 +28,14 @@

const std::string PerfGraph::ROOT_NAME = "Root";

PerfGraph::PerfGraph(const std::string & root_name, MooseApp & app)
: ConsoleStreamInterface(app), _root_name(root_name), _current_position(0),_execution_list_begin(0), _execution_list_end(0), _active(true), _live_print(std::make_unique<PerfGraphLivePrint>(*this, app))
PerfGraph::PerfGraph(const std::string & /*root_name*/, MooseApp & app)
: ConsoleStreamInterface(app), _pid(app.processor_id()), _current_position(0), _execution_list_begin(0), _execution_list_end(0), _active(true), _live_print(std::make_unique<PerfGraphLivePrint>(*this, app))
{
// Start the printing thread
_print_thread = std::thread([this] { this->_live_print->start(); });
if (_pid == 0)
{
// Start the printing thread
_print_thread = std::thread([this] { this->_live_print->start(); });
}

// Not done in the initialization list on purpose because this object needs to be complete first
_root_node = libmesh_make_unique<PerfNode>(registerSection(ROOT_NAME, 0));
Expand All @@ -52,6 +56,12 @@ PerfGraph::PerfGraph(const std::string & root_name, MooseApp & app)

PerfGraph::~PerfGraph()
{
if (_pid == 0)
{
_done.set_value(true);

_print_thread.join();
}
}

unsigned int
Expand Down Expand Up @@ -210,7 +220,7 @@ PerfGraph::push(const PerfID id)
_stack[_current_position] = new_node;

// Add this to the exection list
if (!_id_to_section_info[id]._live_message.empty())
if (_pid == 0 && !_id_to_section_info[id]._live_message.empty())
addToExecutionList(id, IncrementState::started, current_time, start_memory);
}

Expand All @@ -234,8 +244,19 @@ PerfGraph::pop()
_current_position--;

// Add this to the exection list
if (!_id_to_section_info[current_node->id()]._live_message.empty())
if (_pid == 0 && !_id_to_section_info[current_node->id()]._live_message.empty())
{
addToExecutionList(current_node->id(), IncrementState::finished, current_time, current_memory);

// Tell the printing thread that a section has finished
//
// Note: no mutex is needed here because we're using an atomic
// in the predacate of the condition_variable in the thread
// This is technically correct - but there is a chance of missing a signal
// For us - that chance is low and doesn't matter (the timeout will just be hit
// instead). So - I would rather not have an extra lock here in the main thread.
_finished_section.notify_one();
}
}

void
Expand Down
19 changes: 15 additions & 4 deletions framework/src/utils/PerfGraphLivePrint.C
Expand Up @@ -12,6 +12,7 @@
PerfGraphLivePrint::PerfGraphLivePrint(PerfGraph & perf_graph, MooseApp & app) : ConsoleStreamInterface(app), _perf_graph(perf_graph),

_execution_list(perf_graph._execution_list),
_done_future(perf_graph._done.get_future()),
_id_to_section_info(perf_graph._id_to_section_info),
_stack_level(0),
_last_execution_list_end(0),
Expand Down Expand Up @@ -191,12 +192,22 @@ PerfGraphLivePrint::iterateThroughExecutionList()
void
PerfGraphLivePrint::start()
{
while(true)
// Keep going until we're signaled to end
while(_done_future.wait_for(std::chrono::duration<Real>(0.)) == std::future_status::timeout)
{
std::this_thread::sleep_for(std::chrono::seconds(1));
std::unique_lock<std::mutex> lock(_perf_graph._print_thread_mutex);

// The end will be one past the last
_current_execution_list_end = _perf_graph._execution_list_end.load(std::memory_order_relaxed);
// Wait for one second, or until notified that a section is finished
// For a section to have finished the execution list has to have been appended to
// This keeps spurious wakeups from happening
_perf_graph._finished_section.wait_for(lock, std::chrono::duration<Real>(1.), [this]{

// The end will be one past the last
this->_current_execution_list_end = _perf_graph._execution_list_end.load(std::memory_order_relaxed);

return this->_last_execution_list_end != this->_current_execution_list_end;

});

// The last entry in the current execution list for convenience
_current_execution_list_last = _current_execution_list_end - 1 >= 0 ? _current_execution_list_end - 1 : MAX_EXECUTION_LIST_SIZE;
Expand Down

0 comments on commit 3c27278

Please sign in to comment.