Skip to content

Commit

Permalink
ParallelLoop: A bit smarter straggler handling.
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Jun 12, 2021
1 parent 3be5c7b commit 5b64a41
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 21 deletions.
55 changes: 41 additions & 14 deletions Common/Thread/ParallelLoop.cpp
Expand Up @@ -35,25 +35,52 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::

if (range <= numTasks) {
// Just assign one task per thread, as many as we have.
WaitableCounter *counter = new WaitableCounter(range);
WaitableCounter *waitableCounter = new WaitableCounter(range);
for (int i = 0; i < range; i++) {
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, i, i + 1), TaskType::CPU_COMPUTE);
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE);
}
return counter;
return waitableCounter;
} else {
WaitableCounter *counter = new WaitableCounter(numTasks);
// Split the range between threads.
double dx = (double)range / (double)numTasks;
double d = 0.0;
int lastEnd = 0;
// Split the range between threads. Allow for some fractional bits.
const int fractionalBits = 8;

int64_t totalFrac = (int64_t)range << fractionalBits;
int64_t delta = totalFrac / numTasks;

delta = std::max(delta, (int64_t)minSize << fractionalBits);

// Now we can compute the actual number of tasks.
// Remember that stragglers are done on the current thread
// so we don't round up.
numTasks = (int)(totalFrac / delta);

WaitableCounter *waitableCounter = new WaitableCounter(numTasks);
int64_t counter = (int64_t)lower << fractionalBits;

// Split up tasks as equitable as possible.
for (int i = 0; i < numTasks; i++) {
int start = lastEnd;
d += dx;
int end = i == numTasks - 1 ? range : (int)d;
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(counter, loop, start, end), TaskType::CPU_COMPUTE);
lastEnd = end;
int start = (int)(counter >> fractionalBits);
int end = (int)((counter + delta) >> fractionalBits);
if (end > upper) {
// Let's do the stragglers on the current thread.
break;
}
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
counter += delta;
if ((counter >> fractionalBits) > upper) {
break;
}
}

// Run stragglers on the calling thread directly.
// We might add a flag later to avoid this for some cases.
int stragglerStart = (int)(counter >> fractionalBits);
int stragglerEnd = upper;
if (stragglerStart < stragglerEnd) {
// printf("doing stragglers: %d-%d\n", start, upper);
loop(stragglerStart, stragglerEnd);
}
return counter;
return waitableCounter;
}
}

Expand Down
5 changes: 2 additions & 3 deletions Common/Thread/ParallelLoop.h
Expand Up @@ -32,11 +32,10 @@ struct WaitableCounter : public Waitable {
std::condition_variable cond_;
};

// Note that upper bounds are non-inclusive.
// This one never executes the remainer on the calling thread.
// Note that upper bounds are non-inclusive: range is [lower, upper)
WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::function<void(int, int)> &loop, int lower, int upper, int minSize);

// This one optimizes by running the remainder on the calling thread.
// Note that upper bounds are non-inclusive: range is [lower, upper)
void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, int)> &loop, int lower, int upper, int minSize);

// Common utilities for large (!) memory copies.
Expand Down
4 changes: 2 additions & 2 deletions UI/GameInfoCache.cpp
Expand Up @@ -627,8 +627,8 @@ class GameInfoWorkItem : public Task {
}

done:
info_->pending = false;
info_->working = false;
info_->pending.store(false);
info_->working.store(false);
info_->readyEvent.Notify();
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
}
Expand Down
4 changes: 2 additions & 2 deletions unittest/TestThreadManager.cpp
Expand Up @@ -46,10 +46,10 @@ bool TestParallelLoop(ThreadManager *threadMan) {
// Now it's done.

// Try a loop with stragglers.
printf("blocking test #1\n");
printf("blocking test #1 [0-65)\n");
ParallelRangeLoop(threadMan, rangeFunc, 0, 65, 1);
// Try a loop with a relatively large minimum size.
printf("blocking test #2\n");
printf("blocking test #2 [0-100)\n");
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
return true;
}
Expand Down

0 comments on commit 5b64a41

Please sign in to comment.