diff --git a/src/scheduler/MpiWorld.cpp b/src/scheduler/MpiWorld.cpp index b017a00e4..688e5ec04 100644 --- a/src/scheduler/MpiWorld.cpp +++ b/src/scheduler/MpiWorld.cpp @@ -208,7 +208,10 @@ void MpiWorld::create(faabric::Message& call, int newId, int newSize) std::vector executedAt; if (size > 1) { // Send the init messages (note that message i corresponds to rank i+1) - faabric::util::SchedulingDecision decision = sch.callFunctions(req); + // By default, we use the NEVER_ALONE policy for MPI executions to + // minimise cross-host messaging + faabric::util::SchedulingDecision decision = sch.callFunctions( + req, faabric::util::SchedulingTopologyHint::NEVER_ALONE); executedAt = decision.hosts; } assert(executedAt.size() == size - 1); diff --git a/tests/test/scheduler/test_remote_mpi_worlds.cpp b/tests/test/scheduler/test_remote_mpi_worlds.cpp index edf5db2d3..366d6a57a 100644 --- a/tests/test/scheduler/test_remote_mpi_worlds.cpp +++ b/tests/test/scheduler/test_remote_mpi_worlds.cpp @@ -22,9 +22,7 @@ class RemoteCollectiveTestFixture : public RemoteMpiTestFixture thisWorldRanks = { thisHostRankB, thisHostRankA, 0 }; otherWorldRanks = { otherHostRankB, otherHostRankC, otherHostRankA }; - // Here we rely on the scheduler running out of resources and - // overloading this world with ranks 4 and 5 - setWorldSizes(thisWorldSize, 1, 3); + setWorldSizes(thisWorldSize, 3, 3); } MpiWorld& setUpThisWorld() @@ -48,12 +46,11 @@ class RemoteCollectiveTestFixture : public RemoteMpiTestFixture protected: int thisWorldSize = 6; - int otherHostRankA = 1; - int otherHostRankB = 2; - int otherHostRankC = 3; - - int thisHostRankA = 4; - int thisHostRankB = 5; + int thisHostRankA = 1; + int thisHostRankB = 2; + int otherHostRankA = 3; + int otherHostRankB = 4; + int otherHostRankC = 5; std::vector otherWorldRanks; std::vector thisWorldRanks; @@ -62,7 +59,7 @@ class RemoteCollectiveTestFixture : public RemoteMpiTestFixture TEST_CASE_METHOD(RemoteMpiTestFixture, "Test rank allocation", "[mpi]") { // Allocate two ranks in total, one rank per host - setWorldSizes(2, 1, 1); + setWorldSizes(4, 2, 2); MpiWorld& thisWorld = getMpiWorldRegistry().createWorld(msg, worldId); faabric::util::setMockMode(false); @@ -72,8 +69,10 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test rank allocation", "[mpi]") std::thread otherWorldThread([this] { otherWorld.initialiseFromMsg(msg); - REQUIRE(otherWorld.getHostForRank(0) == thisHost); - REQUIRE(otherWorld.getHostForRank(1) == otherHost); + assert(otherWorld.getHostForRank(0) == thisHost); + assert(otherWorld.getHostForRank(1) == thisHost); + assert(otherWorld.getHostForRank(2) == otherHost); + assert(otherWorld.getHostForRank(3) == otherHost); otherWorld.destroy(); }); @@ -83,7 +82,9 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test rank allocation", "[mpi]") } REQUIRE(thisWorld.getHostForRank(0) == thisHost); - REQUIRE(thisWorld.getHostForRank(1) == otherHost); + REQUIRE(thisWorld.getHostForRank(1) == thisHost); + REQUIRE(thisWorld.getHostForRank(2) == otherHost); + REQUIRE(thisWorld.getHostForRank(3) == otherHost); thisWorld.destroy(); } @@ -91,9 +92,9 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "Test rank allocation", "[mpi]") TEST_CASE_METHOD(RemoteMpiTestFixture, "Test send across hosts", "[mpi]") { // Register two ranks (one on each host) - setWorldSizes(2, 1, 1); + setWorldSizes(4, 2, 2); int rankA = 0; - int rankB = 1; + int rankB = 2; std::vector messageData = { 0, 1, 2 }; // Init worlds @@ -137,9 +138,9 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "[mpi]") { // Register two ranks (one on each host) - setWorldSizes(2, 1, 1); + setWorldSizes(4, 2, 2); int rankA = 0; - int rankB = 1; + int rankB = 2; std::vector messageData = { 0, 1, 2 }; std::vector messageData2 = { 3, 4, 5 }; @@ -201,61 +202,14 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, thisWorld.destroy(); } -TEST_CASE_METHOD(RemoteMpiTestFixture, "Test barrier across hosts", "[mpi]") -{ - // Register two ranks (one on each host) - setWorldSizes(2, 1, 1); - int rankA = 0; - int rankB = 1; - std::vector sendData = { 0, 1, 2 }; - std::vector recvData = { -1, -1, -1 }; - - // Init worlds - MpiWorld& thisWorld = getMpiWorldRegistry().createWorld(msg, worldId); - faabric::util::setMockMode(false); - - thisWorld.broadcastHostsToRanks(); - - std::thread otherWorldThread([this, rankA, rankB, &sendData, &recvData] { - otherWorld.initialiseFromMsg(msg); - - otherWorld.send( - rankB, rankA, BYTES(sendData.data()), MPI_INT, sendData.size()); - - // Barrier on this rank - otherWorld.barrier(rankB); - assert(sendData == recvData); - otherWorld.destroy(); - }); - - // Receive the message for the given rank - thisWorld.recv(rankB, - rankA, - BYTES(recvData.data()), - MPI_INT, - recvData.size(), - MPI_STATUS_IGNORE); - REQUIRE(recvData == sendData); - - // Call barrier to synchronise remote host - thisWorld.barrier(rankA); - - // Clean up - if (otherWorldThread.joinable()) { - otherWorldThread.join(); - } - - thisWorld.destroy(); -} - TEST_CASE_METHOD(RemoteMpiTestFixture, "Test sending many messages across host", "[mpi]") { // Register two ranks (one on each host) - setWorldSizes(2, 1, 1); + setWorldSizes(4, 2, 2); int rankA = 0; - int rankB = 1; + int rankB = 2; int numMessages = 1000; // Init worlds @@ -374,8 +328,17 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, MPI_INT, nPerRank); + // Build the expectation + std::vector> expected(otherWorldRanks.size(), + std::vector(nPerRank)); + for (int i = 0; i < otherWorldRanks.size(); i++) { + for (int j = 0; j < nPerRank; j++) { + expected.at(i).at(j) = otherWorldRanks.at(i) * nPerRank + j; + } + } + // Check for root - assert(actual == std::vector({ 8, 9, 10, 11 })); + assert(actual == expected.at(0)); // Check the other ranks on this host have received the data otherWorld.scatter(otherHostRankB, @@ -386,7 +349,7 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, BYTES(actual.data()), MPI_INT, nPerRank); - assert(actual == std::vector({ 4, 5, 6, 7 })); + assert(actual == expected.at(2)); otherWorld.scatter(otherHostRankB, otherHostRankC, @@ -396,12 +359,21 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, BYTES(actual.data()), MPI_INT, nPerRank); - assert(actual == std::vector({ 12, 13, 14, 15 })); + assert(actual == expected.at(1)); testLatch->wait(); otherWorld.destroy(); }); + // Build the expectation + std::vector> expected(thisWorldRanks.size(), + std::vector(nPerRank)); + for (int i = 0; i < thisWorldRanks.size(); i++) { + for (int j = 0; j < nPerRank; j++) { + expected.at(i).at(j) = thisWorldRanks.at(i) * nPerRank + j; + } + } + // Check for ranks on this host std::vector actual(nPerRank, -1); thisWorld.scatter(otherHostRankB, @@ -412,7 +384,7 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, BYTES(actual.data()), MPI_INT, nPerRank); - REQUIRE(actual == std::vector({ 0, 1, 2, 3 })); + REQUIRE(actual == expected.at(2)); thisWorld.scatter(otherHostRankB, thisHostRankB, @@ -422,7 +394,7 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, BYTES(actual.data()), MPI_INT, nPerRank); - REQUIRE(actual == std::vector({ 20, 21, 22, 23 })); + REQUIRE(actual == expected.at(0)); thisWorld.scatter(otherHostRankB, thisHostRankA, @@ -432,7 +404,7 @@ TEST_CASE_METHOD(RemoteCollectiveTestFixture, BYTES(actual.data()), MPI_INT, nPerRank); - REQUIRE(actual == std::vector({ 16, 17, 18, 19 })); + REQUIRE(actual == expected.at(1)); // Clean up testLatch->wait(); @@ -530,8 +502,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "[mpi]") { // Allocate two ranks in total, one rank per host - setWorldSizes(2, 1, 1); - int sendRank = 1; + setWorldSizes(4, 2, 2); + int sendRank = 2; int recvRank = 0; std::vector messageData = { 0, 1, 2 }; @@ -597,8 +569,8 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "[mpi]") { // Allocate two ranks in total, one rank per host - setWorldSizes(2, 1, 1); - int sendRank = 1; + setWorldSizes(4, 2, 2); + int sendRank = 2; int recvRank = 0; // Init world @@ -729,9 +701,9 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, "[mpi]") { // Register two ranks (one on each host) - setWorldSizes(2, 1, 1); + setWorldSizes(4, 2, 2); int rankA = 0; - int rankB = 1; + int rankB = 2; std::vector messageData = { 0, 1, 2 }; std::vector messageData2 = { 3, 4 }; @@ -779,7 +751,10 @@ TEST_CASE_METHOD(RemoteMpiTestFixture, }); std::vector endpointCheck; - std::vector expectedEndpoints = { false, true, false, false }; + std::vector expectedEndpoints = { false, false, true, false, + false, false, false, false, + false, false, false, false, + false, false, false, false }; // Sending a message initialises the remote endpoint thisWorld.send(