Skip to content

Commit

Permalink
Dirty tracking performance improvements (#210)
Browse files Browse the repository at this point in the history
* Start on userfault experiment

* Tidy up

* Failing test for mapped regions

* Destroy uffd tests

* Refactor to suit configurable diffing

* Fixed tests for soft dirty PTEs

* Snapshot self-dirty tracking

* Working tests

* Tidy up when executor does dirty tracking

* Special-case reinitialisation

* Remove restartTracking

* Remove duplicated function names

* Handle dirty regions

* Move gap filling back again

* Update to using OffsetMemoryRegions

* Remove debug logging

* Tidy up

* Renaming

* Missing comments

* Add test for multi-threaded segfault handling

* Remove dirty checks from memory test

* Fixing up tests

* Refactor overwrite diff logic

* Fix test

* Formatting

* Add main thread snapshot handling to faabric

* Incorporate size change into main thread snapshot

* Simplify logic around snapshots in executor

* Fixing a couple of tests

* Fixing tests

* Small tidy-ups

* Fix up unit tests

* Fix up a couple of distributed tests

* Moved thread scheduling and decision caching into executor

* Fixing dist tests

* Remove core dump

* Formatting

* Revert trace logging in DC file

* Fix data race in tests

* Tidy up and docs

* Formatting

* Guard against empty memory

* More logging

* Specify merge regions when spawning threads

* Small logging fix

* Attempt to speed up diffing

* Tighter diffing loops

* Merge diffing and regions

* Remove OffsetMemoryRegion

* Avoid vector<bool>

* More dirty tracking

* Fix up SDPTE

* Switch from shared to full lock

* Add test for dirty tracking config field

* Remove await thread results funciton
  • Loading branch information
Shillaker committed Jan 7, 2022
1 parent d4599a0 commit b3229aa
Show file tree
Hide file tree
Showing 37 changed files with 2,478 additions and 1,128 deletions.
4 changes: 3 additions & 1 deletion dist-test/dev_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ elif [[ "$1" == "restart" ]]; then
docker-compose restart dist-test-server
elif [[ "$1" == "stop" ]]; then
docker-compose stop dist-test-server
elif [[ "$1" == "rm" ]]; then
docker-compose rm dist-test-server
else
echo "Unrecognised argument: $1"
echo ""
echo "Usage:"
echo ""
echo "./dist-test/dev_server.sh [restart|stop]"
echo "./dist-test/dev_server.sh [restart|stop|rm]"
exit 1
fi

Expand Down
39 changes: 35 additions & 4 deletions include/faabric/scheduler/Scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
#include <faabric/scheduler/FunctionCallClient.h>
#include <faabric/scheduler/InMemoryMessageQueue.h>
#include <faabric/snapshot/SnapshotClient.h>
#include <faabric/snapshot/SnapshotRegistry.h>
#include <faabric/transport/PointToPointBroker.h>
#include <faabric/util/config.h>
#include <faabric/util/dirty.h>
#include <faabric/util/func.h>
#include <faabric/util/queue.h>
#include <faabric/util/scheduling.h>
Expand All @@ -32,13 +34,11 @@ class ExecutorTask
ExecutorTask(int messageIndexIn,
std::shared_ptr<faabric::BatchExecuteRequest> reqIn,
std::shared_ptr<std::atomic<int>> batchCounterIn,
bool needsSnapshotSyncIn,
bool skipResetIn);

std::shared_ptr<faabric::BatchExecuteRequest> req;
std::shared_ptr<std::atomic<int>> batchCounter;
int messageIndex = 0;
bool needsSnapshotSync = false;
bool skipReset = false;
};

Expand All @@ -51,6 +51,10 @@ class Executor

virtual ~Executor() = default;

std::vector<std::pair<uint32_t, int32_t>> executeThreads(
std::shared_ptr<faabric::BatchExecuteRequest> req,
const std::vector<faabric::util::SnapshotMergeRegion>& mergeRegions);

void executeTasks(std::vector<int> msgIdxs,
std::shared_ptr<faabric::BatchExecuteRequest> req);

Expand All @@ -69,20 +73,42 @@ class Executor

void releaseClaim();

virtual faabric::util::MemoryView getMemoryView();
std::shared_ptr<faabric::util::SnapshotData> getMainThreadSnapshot(
faabric::Message& msg,
bool createIfNotExists = false);

protected:
virtual void restore(faabric::Message& msg);
virtual void restore(const std::string& snapshotKey);

virtual void postFinish();

virtual std::span<uint8_t> getMemoryView();

virtual void setMemorySize(size_t newSize);

faabric::Message boundMessage;

Scheduler& sch;

faabric::snapshot::SnapshotRegistry& reg;

faabric::util::DirtyTracker& tracker;

uint32_t threadPoolSize = 0;

private:
std::atomic<bool> claimed = false;

// ---- Application threads ----
std::shared_mutex threadExecutionMutex;
std::unordered_map<std::string, int> cachedGroupIds;
std::unordered_map<std::string, std::vector<std::string>>
cachedDecisionHosts;
std::vector<std::pair<uint32_t, uint32_t>> dirtyRegions;

void deleteMainThreadSnapshot(const faabric::Message& msg);

// ---- Function execution thread pool ----
std::mutex threadsMutex;
std::vector<std::shared_ptr<std::thread>> threadPoolThreads;
std::vector<std::shared_ptr<std::thread>> deadThreads;
Expand All @@ -93,6 +119,10 @@ class Executor
void threadPoolThread(int threadPoolIdx);
};

Executor* getExecutingExecutor();

void setExecutingExecutor(Executor* exec);

class Scheduler
{
public:
Expand Down Expand Up @@ -138,6 +168,7 @@ class Scheduler

void pushSnapshotDiffs(
const faabric::Message& msg,
const std::string& snapshotKey,
const std::vector<faabric::util::SnapshotDiff>& diffs);

void setThreadResultLocally(uint32_t msgId, int32_t returnValue);
Expand Down
16 changes: 16 additions & 0 deletions include/faabric/util/bytes.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#pragma once

#include <algorithm>
#include <list>
#include <span>
#include <stdexcept>
#include <string>
#include <vector>
Expand All @@ -25,6 +27,20 @@ int safeCopyToBuffer(const uint8_t* dataIn,
uint8_t* buffer,
int bufferLen);

/*
* Returns a list of pairs of <start, length> for any bytes differing between
* the two arrays.
*/
std::vector<std::pair<uint32_t, uint32_t>> diffArrayRegions(
std::span<const uint8_t> a,
std::span<const uint8_t> b);

/*
* Returns a list of flags marking which bytes differ between the two arrays.
*/
std::vector<bool> diffArrays(std::span<const uint8_t> a,
std::span<const uint8_t> b);

template<class T>
T unalignedRead(const uint8_t* bytes)
{
Expand Down
3 changes: 3 additions & 0 deletions include/faabric/util/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ class SystemConfig
int snapshotServerThreads;
int pointToPointServerThreads;

// Dirty tracking
std::string dirtyTrackingMode;

SystemConfig();

void print();
Expand Down
14 changes: 13 additions & 1 deletion include/faabric/util/crash.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@

namespace faabric::util {

void setUpCrashHandler();
/*
* Sets up crash handling. By default covers a number of signals that would
* otherwise cause a crash. Signal argument can be provided to reinstating crash
* handling for a specific signal after it's been used elsewhere in the
* application (e.g. for dirty tracking).
*/
void setUpCrashHandler(int sig = -1);

/*
* Prints the stack trace for a given signal. Only to be called in signal
* handlers.
*/
void handleCrash(int sig);

}
124 changes: 124 additions & 0 deletions include/faabric/util/dirty.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#pragma once

#include <signal.h>
#include <span>
#include <string>

#include <faabric/util/config.h>
#include <faabric/util/logging.h>
#include <faabric/util/memory.h>

#define CLEAR_REFS "/proc/self/clear_refs"
#define PAGEMAP "/proc/self/pagemap"

#define PAGEMAP_ENTRY_BYTES sizeof(uint64_t)
#define PAGEMAP_SOFT_DIRTY (1Ull << 55)

namespace faabric::util {

/*
* Interface to all dirty page tracking. Implementation-specific boilerplate
* held in subclasses.
*/
class DirtyTracker
{
public:
virtual void clearAll() = 0;

virtual void reinitialise() = 0;

virtual void startTracking(std::span<uint8_t> region) = 0;

virtual void stopTracking(std::span<uint8_t> region) = 0;

virtual std::vector<std::pair<uint32_t, uint32_t>> getDirtyOffsets(
std::span<uint8_t> region) = 0;

virtual void startThreadLocalTracking(std::span<uint8_t> region) = 0;

virtual void stopThreadLocalTracking(std::span<uint8_t> region) = 0;

virtual std::vector<std::pair<uint32_t, uint32_t>>
getThreadLocalDirtyOffsets(std::span<uint8_t> region) = 0;

virtual std::vector<std::pair<uint32_t, uint32_t>> getBothDirtyOffsets(
std::span<uint8_t> region) = 0;
};

/*
* Dirty tracking implementation using soft-dirty PTEs
* https://www.kernel.org/doc/html/latest/admin-guide/mm/soft-dirty.html
*/
class SoftPTEDirtyTracker final : public DirtyTracker
{
public:
SoftPTEDirtyTracker();

~SoftPTEDirtyTracker();

void clearAll() override;

void reinitialise() override;

void startTracking(std::span<uint8_t> region) override;

void stopTracking(std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getDirtyOffsets(
std::span<uint8_t> region) override;

void startThreadLocalTracking(std::span<uint8_t> region) override;

void stopThreadLocalTracking(std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getThreadLocalDirtyOffsets(
std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getBothDirtyOffsets(
std::span<uint8_t> region) override;

private:
FILE* clearRefsFile = nullptr;

FILE* pagemapFile = nullptr;
};

/*
* Dirty tracking implementation using mprotect to make pages read-only and
* use segfaults resulting from writes to mark them as dirty.
*/
class SegfaultDirtyTracker final : public DirtyTracker
{
public:
SegfaultDirtyTracker();

void clearAll() override;

void reinitialise() override;

void startTracking(std::span<uint8_t> region) override;

void stopTracking(std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getDirtyOffsets(
std::span<uint8_t> region) override;

void startThreadLocalTracking(std::span<uint8_t> region) override;

void stopThreadLocalTracking(std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getThreadLocalDirtyOffsets(
std::span<uint8_t> region) override;

std::vector<std::pair<uint32_t, uint32_t>> getBothDirtyOffsets(
std::span<uint8_t> region) override;

// Signal handler for the resulting segfaults
static void handler(int sig, siginfo_t* info, void* ucontext) noexcept;

private:
void setUpSignalHandler();
};

DirtyTracker& getDirtyTracker();
}
7 changes: 7 additions & 0 deletions include/faabric/util/func.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,11 @@ std::string statusKeyFromMessageId(unsigned int mid);
std::vector<uint8_t> messageToBytes(const faabric::Message& msg);

std::vector<std::string> getArgvForMessage(const faabric::Message& msg);

/*
* Gets the key for the main thread snapshot for the given message. Result will
* be the same on all hosts.
*/
std::string getMainThreadSnapshotKey(const faabric::Message& msg);

}
22 changes: 11 additions & 11 deletions include/faabric/util/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@

namespace faabric::util {

/*
* Dedupes a list of dirty regions specified by offset and length
*/
std::vector<std::pair<uint32_t, uint32_t>> dedupeMemoryRegions(
std::vector<std::pair<uint32_t, uint32_t>>& regions);

/*
* Typedef used to enforce RAII on mmapped memory regions
*/
typedef std::unique_ptr<uint8_t[], std::function<void(uint8_t*)>> MemoryRegion;

// -------------------------
// Alignment
// -------------------------
Expand All @@ -36,20 +47,9 @@ size_t alignOffsetDown(size_t offset);

AlignedChunk getPageAlignedChunk(long offset, long length);

// -------------------------
// Dirty pages
// -------------------------
void resetDirtyTracking();

std::vector<int> getDirtyPageNumbers(const uint8_t* ptr, int nPages);

std::vector<std::pair<uint32_t, uint32_t>> getDirtyRegions(const uint8_t* ptr,
int nPages);

// -------------------------
// Allocation
// -------------------------
typedef std::unique_ptr<uint8_t[], std::function<void(uint8_t*)>> MemoryRegion;

MemoryRegion allocatePrivateMemory(size_t size);

Expand Down
Loading

0 comments on commit b3229aa

Please sign in to comment.