diff --git a/src/proto/faabric.proto b/src/proto/faabric.proto index c19bc682d..34f353ac3 100644 --- a/src/proto/faabric.proto +++ b/src/proto/faabric.proto @@ -92,7 +92,7 @@ message MpiHostsToRanksMessage { } // --------------------------------------------- -// PROFILING +// TRACING // --------------------------------------------- message MpiPerRankMessageCount { @@ -162,7 +162,7 @@ message Message { bytes sgxPolicy = 36; bytes sgxResult = 37; - // This last struct is used for tracing purposes, it should only be set in + // This last struct is used for tracing purposes, it is only used in // non-release builds CallRecords records = 38; } diff --git a/src/scheduler/Executor.cpp b/src/scheduler/Executor.cpp index d6e70c6cb..bae9a933c 100644 --- a/src/scheduler/Executor.cpp +++ b/src/scheduler/Executor.cpp @@ -11,8 +11,8 @@ #include #include #include -#include #include +#include #define POOL_SHUTDOWN -1 diff --git a/src/scheduler/MpiWorld.cpp b/src/scheduler/MpiWorld.cpp index 7abc336c2..e58740d2c 100644 --- a/src/scheduler/MpiWorld.cpp +++ b/src/scheduler/MpiWorld.cpp @@ -581,8 +581,10 @@ void MpiWorld::send(int sendRank, } // In non-release builds, track that we have sent this message - faabric::util::tracing::getCallRecords().addRecord(thisMsgId, - faabric::util::tracing::RecordType::MpiPerRankMessageCount, recvRank); + faabric::util::tracing::getCallRecords().addRecord( + thisMsgId, + faabric::util::tracing::RecordType::MpiPerRankMessageCount, + recvRank); } void MpiWorld::recv(int sendRank, diff --git a/src/util/tracing.cpp b/src/util/tracing.cpp index af31c4104..52777f6b9 100644 --- a/src/util/tracing.cpp +++ b/src/util/tracing.cpp @@ -6,8 +6,10 @@ void CallRecords::startRecording(const faabric::Message& msg) { #ifndef NDEBUG if (linkedMsg != nullptr && linkedMsg->id() != msg.id()) { - SPDLOG_ERROR("CallRecords already linked to a different message: (linked: {} != provided: {})", - linkedMsg->id(), msg.id()); + SPDLOG_ERROR("Error starting recording, records not linked to the right" + " message: (linked: {} != provided: {})", + linkedMsg->id(), + msg.id()); throw std::runtime_error("CallRecords linked to a different message"); } else if (linkedMsg == nullptr) { linkedMsg = std::make_shared(msg); @@ -21,8 +23,10 @@ void CallRecords::stopRecording(faabric::Message& msg) { #ifndef NDEBUG if (linkedMsg == nullptr || linkedMsg->id() != msg.id()) { - SPDLOG_ERROR("CallRecords not linked to the right message: (linked: {} != provided: {})", - linkedMsg->id(), msg.id()); + SPDLOG_ERROR("Error stopping recording, records not linked to the right" + " message: (linked: {} != provided: {})", + linkedMsg->id(), + msg.id()); throw std::runtime_error("CallRecords linked to a different message"); } @@ -72,14 +76,17 @@ void CallRecords::addRecord(int msgId, RecordType recordType, int idToIncrement) #ifndef NDEBUG // Check message id if (linkedMsg == nullptr || linkedMsg->id() != msgId) { - SPDLOG_ERROR("CallRecords not linked to the right message: (linked: {} != provided: {})", - linkedMsg->id(), msgId); + SPDLOG_ERROR("CallRecords not linked to the right message: (linked: {} " + "!= provided: {})", + linkedMsg->id(), + msgId); throw std::runtime_error("CallRecords linked to a different message"); } // Add the record to the list of on going records if it is not there bool mustInit = false; - auto it = std::find(onGoingRecordings.begin(), onGoingRecordings.end(), recordType); + auto it = + std::find(onGoingRecordings.begin(), onGoingRecordings.end(), recordType); if (it == onGoingRecordings.end()) { onGoingRecordings.push_back(recordType); mustInit = true; @@ -107,7 +114,6 @@ void CallRecords::addRecord(int msgId, RecordType recordType, int idToIncrement) #endif } - CallRecords& getCallRecords() { static thread_local CallRecords callRecords; diff --git a/tests/test/util/test_tracing.cpp b/tests/test/util/test_tracing.cpp index bb55a2963..b19f8b54a 100644 --- a/tests/test/util/test_tracing.cpp +++ b/tests/test/util/test_tracing.cpp @@ -23,10 +23,17 @@ TEST_CASE_METHOD(MpiTestFixture, std::vector messageData = { 0, 1, 2 }; auto buffer = new int[messageData.size()]; - world.send( - rankA1, rankA2, BYTES(messageData.data()), MPI_INT, messageData.size()); - world.recv( - rankA1, rankA2, BYTES(buffer), MPI_INT, messageData.size(), &status); + int numToSend = 10; + + for (int i = 0; i < numToSend; i++) { + world.send(rankA1, + rankA2, + BYTES(messageData.data()), + MPI_INT, + messageData.size()); + world.recv( + rankA1, rankA2, BYTES(buffer), MPI_INT, messageData.size(), &status); + } // Stop recording and check we have only recorded one message faabric::util::tracing::getCallRecords().stopRecording(msg); @@ -35,7 +42,7 @@ TEST_CASE_METHOD(MpiTestFixture, REQUIRE(msg.records().mpimsgcount().ranks_size() == worldSize); for (int i = 0; i < worldSize; i++) { if (i == rankA2) { - REQUIRE(msg.records().mpimsgcount().nummessages(i) == 1); + REQUIRE(msg.records().mpimsgcount().nummessages(i) == numToSend); } else { REQUIRE(msg.records().mpimsgcount().nummessages(i) == 0); }