Skip to content

Commit

Permalink
add scheduling topology hint and extensive testing
Browse files Browse the repository at this point in the history
  • Loading branch information
csegarragonz committed Nov 24, 2021
1 parent 818a808 commit 29f4d4e
Show file tree
Hide file tree
Showing 4 changed files with 324 additions and 15 deletions.
12 changes: 10 additions & 2 deletions include/faabric/scheduler/Scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ class Scheduler

faabric::util::SchedulingDecision callFunctions(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal = false);
bool forceLocal = false,
faabric::util::SchedulingTopologyHint =
faabric::util::SchedulingTopologyHint::NORMAL);

faabric::util::SchedulingDecision callFunctions(
std::shared_ptr<faabric::BatchExecuteRequest> req,
Expand Down Expand Up @@ -177,6 +179,11 @@ class Scheduler

void clearRecordedMessages();

faabric::util::SchedulingDecision publicMakeSchedulingDecision(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal,
faabric::util::SchedulingTopologyHint topologyHint);

// ----------------------------------
// Exec graph
// ----------------------------------
Expand Down Expand Up @@ -233,7 +240,8 @@ class Scheduler

faabric::util::SchedulingDecision makeSchedulingDecision(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal);
bool forceLocal,
faabric::util::SchedulingTopologyHint topologyHint);

faabric::util::SchedulingDecision doCallFunctions(
std::shared_ptr<faabric::BatchExecuteRequest> req,
Expand Down
12 changes: 12 additions & 0 deletions include/faabric/util/scheduling.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,16 @@ class SchedulingDecision
int32_t appIdx,
int32_t groupIdx);
};

// Scheduling topology hints help the scheduler decide which host to assign new
// requests in a batch.
// - NORMAL: bin-packs requests to slots in hosts starting from the master
// host, and overloadds the master if it runs out of resources.
// - PAIRS: never allocates a single (non-master) request to a host without
// other requests of the batch.
enum SchedulingTopologyHint
{
NORMAL,
PAIRS
};
}
58 changes: 47 additions & 11 deletions src/scheduler/Scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ void Scheduler::notifyExecutorShutdown(Executor* exec,

faabric::util::SchedulingDecision Scheduler::callFunctions(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal)
bool forceLocal,
faabric::util::SchedulingTopologyHint topologyHint)
{
// Note, we assume all the messages are for the same function and have the
// same master host
Expand All @@ -236,7 +237,8 @@ faabric::util::SchedulingDecision Scheduler::callFunctions(

faabric::util::FullLock lock(mx);

SchedulingDecision decision = makeSchedulingDecision(req, forceLocal);
SchedulingDecision decision =
makeSchedulingDecision(req, forceLocal, topologyHint);

// Send out point-to-point mappings if necessary (unless being forced to
// execute locally, in which case they will be transmitted from the
Expand All @@ -249,9 +251,22 @@ faabric::util::SchedulingDecision Scheduler::callFunctions(
return doCallFunctions(req, decision, lock);
}

faabric::util::SchedulingDecision Scheduler::publicMakeSchedulingDecision(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal,
faabric::util::SchedulingTopologyHint topologyHint)
{
if (!faabric::util::isTestMode()) {
throw std::runtime_error("This function must only be called in tests");
}

return makeSchedulingDecision(req, forceLocal, topologyHint);
}

faabric::util::SchedulingDecision Scheduler::makeSchedulingDecision(
std::shared_ptr<faabric::BatchExecuteRequest> req,
bool forceLocal)
bool forceLocal,
faabric::util::SchedulingTopologyHint topologyHint)
{
int nMessages = req->messages_size();
faabric::Message& firstMsg = req->mutable_messages()->at(0);
Expand Down Expand Up @@ -296,8 +311,20 @@ faabric::util::SchedulingDecision Scheduler::makeSchedulingDecision(
int available = r.slots() - r.usedslots();
int nOnThisHost = std::min(available, remainder);

for (int i = 0; i < nOnThisHost; i++) {
hosts.push_back(h);
// Under the pairs topology hint, we never allocate a single
// non-master request (id != 0) to a host without other
// requests of the batch
bool stickToPreviousHost =
(topologyHint ==
faabric::util::SchedulingTopologyHint::PAIRS &&
nOnThisHost == 1 && hosts.size() > 0);

if (stickToPreviousHost) {
hosts.push_back(hosts.back());
} else {
for (int i = 0; i < nOnThisHost; i++) {
hosts.push_back(h);
}
}

remainder -= nOnThisHost;
Expand All @@ -323,13 +350,22 @@ faabric::util::SchedulingDecision Scheduler::makeSchedulingDecision(
int available = r.slots() - r.usedslots();
int nOnThisHost = std::min(available, remainder);

// Register the host if it's exected a function
if (nOnThisHost > 0) {
registeredHosts[funcStr].insert(h);
}
bool stickToPreviousHost =
(topologyHint ==
faabric::util::SchedulingTopologyHint::PAIRS &&
nOnThisHost == 1 && hosts.size() > 0);

for (int i = 0; i < nOnThisHost; i++) {
hosts.push_back(h);
if (stickToPreviousHost) {
hosts.push_back(hosts.back());
} else {
// Register the host if it's exected a function
if (nOnThisHost > 0) {
registeredHosts[funcStr].insert(h);
}

for (int i = 0; i < nOnThisHost; i++) {
hosts.push_back(h);
}
}

remainder -= nOnThisHost;
Expand Down
Loading

0 comments on commit 29f4d4e

Please sign in to comment.