Skip to content

Commit

Permalink
Metal: implement more accurate buffer tracking (#7839)
Browse files Browse the repository at this point in the history
  • Loading branch information
bejado committed May 10, 2024
1 parent 7f8fbe5 commit 54a800a
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 76 deletions.
106 changes: 51 additions & 55 deletions filament/backend/src/metal/MetalBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ class ScopedAllocationTimer {
const char* mName;
};

class TrackedMetalBuffer {
public:
#ifndef FILAMENT_METAL_BUFFER_TRACKING
#define FILAMENT_METAL_BUFFER_TRACKING 0
#endif

class MetalBufferTracking {
public:
static constexpr size_t EXCESS_BUFFER_COUNT = 30000;

enum class Type {
Expand All @@ -91,66 +94,57 @@ class TrackedMetalBuffer {
}
}

TrackedMetalBuffer() noexcept : mBuffer(nil) {}
TrackedMetalBuffer(nullptr_t) noexcept : mBuffer(nil) {}
TrackedMetalBuffer(id<MTLBuffer> buffer, Type type) : mBuffer(buffer), mType(type) {
assert_invariant(type != Type::NONE);
if (buffer) {
aliveBuffers[toIndex(type)]++;
mType = type;
if (getAliveBuffers() >= EXCESS_BUFFER_COUNT) {
if (platform && platform->hasDebugUpdateStatFunc()) {
platform->debugUpdateStat("filament.metal.excess_buffers_allocated",
TrackedMetalBuffer::getAliveBuffers());
}
#if FILAMENT_METAL_BUFFER_TRACKING
static void initialize() {
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
for (size_t i = 0; i < TypeCount; i++) {
aliveBuffers[i] = [NSHashTable weakObjectsHashTable];
}
}
});
}

~TrackedMetalBuffer() {
if (mBuffer) {
assert_invariant(mType != Type::NONE);
aliveBuffers[toIndex(mType)]--;
}
}

TrackedMetalBuffer(TrackedMetalBuffer&&) = delete;
TrackedMetalBuffer(TrackedMetalBuffer const&) = delete;
TrackedMetalBuffer& operator=(TrackedMetalBuffer const&) = delete;
static void setPlatform(MetalPlatform* p) { platform = p; }

TrackedMetalBuffer& operator=(TrackedMetalBuffer&& rhs) noexcept {
swap(rhs);
return *this;
static void track(id<MTLBuffer> buffer, Type type) {
assert_invariant(type != Type::NONE);
if (UTILS_UNLIKELY(getAliveBuffers() >= EXCESS_BUFFER_COUNT)) {
if (platform && platform->hasDebugUpdateStatFunc()) {
platform->debugUpdateStat("filament.metal.excess_buffers_allocated",
MetalBufferTracking::getAliveBuffers());
}
}
[aliveBuffers[toIndex(type)] addObject:buffer];
}

id<MTLBuffer> get() const noexcept { return mBuffer; }
operator bool() const noexcept { return bool(mBuffer); }

static uint64_t getAliveBuffers() {
uint64_t sum = 0;
for (const auto& v : aliveBuffers) {
sum += v;
for (size_t i = 1; i < TypeCount; i++) {
sum += getAliveBuffers(static_cast<Type>(i));
}
return sum;
}

static uint64_t getAliveBuffers(Type type) {
assert_invariant(type != Type::NONE);
return aliveBuffers[toIndex(type)];
NSHashTable* hashTable = aliveBuffers[toIndex(type)];
// Caution! We can't simply use hashTable.count here, which is inaccurate.
// See http://cocoamine.net/blog/2013/12/13/nsmaptable-and-zeroing-weak-references/
return hashTable.objectEnumerator.allObjects.count;
}
static void setPlatform(MetalPlatform* p) { platform = p; }
#else
static void initialize() {}
static void setPlatform(MetalPlatform* p) {}
static id<MTLBuffer> track(id<MTLBuffer> buffer, Type type) { return buffer; }
static uint64_t getAliveBuffers() { return 0; }
static uint64_t getAliveBuffers(Type type) { return 0; }
#endif

private:
void swap(TrackedMetalBuffer& other) noexcept {
std::swap(mBuffer, other.mBuffer);
std::swap(mType, other.mType);
}

id<MTLBuffer> mBuffer;
Type mType = Type::NONE;

#if FILAMENT_METAL_BUFFER_TRACKING
static std::array<NSHashTable<id<MTLBuffer>>*, TypeCount> aliveBuffers;
static MetalPlatform* platform;
static std::array<uint64_t, TypeCount> aliveBuffers;
#endif
};

class MetalBuffer {
Expand Down Expand Up @@ -204,7 +198,7 @@ class MetalBuffer {

private:

TrackedMetalBuffer mBuffer;
id<MTLBuffer> mBuffer;
size_t mBufferSize = 0;
void* mCpuBuffer = nullptr;
MetalContext& mContext;
Expand Down Expand Up @@ -253,9 +247,11 @@ class MetalRingBuffer {
mBufferOptions(options),
mSlotSizeBytes(computeSlotSize(layout)),
mSlotCount(slotCount) {
ScopedAllocationTimer timer("ring");
mBuffer = { [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions],
TrackedMetalBuffer::Type::RING };
{
ScopedAllocationTimer timer("ring");
mBuffer = [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions];
}
MetalBufferTracking::track(mBuffer, MetalBufferTracking::Type::RING);
assert_invariant(mBuffer);
}

Expand All @@ -275,11 +271,11 @@ class MetalRingBuffer {
// finishes executing.
{
ScopedAllocationTimer timer("ring");
mAuxBuffer = { [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions],
TrackedMetalBuffer::Type::RING };
mAuxBuffer = [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions];
}
MetalBufferTracking::track(mAuxBuffer, MetalBufferTracking::Type::RING);
assert_invariant(mAuxBuffer);
return { mAuxBuffer.get(), 0 };
return { mAuxBuffer, 0 };
}
mCurrentSlot = (mCurrentSlot + 1) % mSlotCount;
mOccupiedSlots->fetch_add(1, std::memory_order_relaxed);
Expand Down Expand Up @@ -308,9 +304,9 @@ class MetalRingBuffer {
*/
std::pair<id<MTLBuffer>, NSUInteger> getCurrentAllocation() const {
if (UTILS_UNLIKELY(mAuxBuffer)) {
return { mAuxBuffer.get(), 0 };
return { mAuxBuffer, 0 };
}
return { mBuffer.get(), mCurrentSlot * mSlotSizeBytes };
return { mBuffer, mCurrentSlot * mSlotSizeBytes };
}

bool canAccomodateLayout(MTLSizeAndAlign layout) const {
Expand All @@ -319,8 +315,8 @@ class MetalRingBuffer {

private:
id<MTLDevice> mDevice;
TrackedMetalBuffer mBuffer;
TrackedMetalBuffer mAuxBuffer;
id<MTLBuffer> mBuffer;
id<MTLBuffer> mAuxBuffer;

MTLResourceOptions mBufferOptions;

Expand Down
20 changes: 12 additions & 8 deletions filament/backend/src/metal/MetalBuffer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@
namespace filament {
namespace backend {

std::array<uint64_t, TrackedMetalBuffer::TypeCount> TrackedMetalBuffer::aliveBuffers = { 0 };
MetalPlatform* TrackedMetalBuffer::platform = nullptr;
MetalPlatform* ScopedAllocationTimer::platform = nullptr;

#if FILAMENT_METAL_BUFFER_TRACKING
std::array<NSHashTable<id<MTLBuffer>>*, MetalBufferTracking::TypeCount>
MetalBufferTracking::aliveBuffers;
MetalPlatform* MetalBufferTracking::platform = nullptr;
#endif

MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage,
size_t size, bool forceGpuBuffer) : mBufferSize(size), mContext(context) {
// If the buffer is less than 4K in size and is updated frequently, we don't use an explicit
Expand All @@ -41,9 +45,9 @@
// Otherwise, we allocate a private GPU buffer.
{
ScopedAllocationTimer timer("generic");
mBuffer = { [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate],
TrackedMetalBuffer::Type::GENERIC };
mBuffer = [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate];
}
MetalBufferTracking::track(mBuffer, MetalBufferTracking::Type::GENERIC);
ASSERT_POSTCONDITION(mBuffer, "Could not allocate Metal buffer of size %zu.", size);
}

Expand All @@ -70,7 +74,7 @@
// Acquire a staging buffer to hold the contents of this update.
MetalBufferPool* bufferPool = mContext.bufferPool;
const MetalBufferPoolEntry* const staging = bufferPool->acquireBuffer(size);
memcpy(staging->buffer.get().contents, src, size);
memcpy(staging->buffer.contents, src, size);

// The blit below requires that byteOffset be a multiple of 4.
ASSERT_PRECONDITION(!(byteOffset & 0x3u), "byteOffset must be a multiple of 4");
Expand All @@ -79,9 +83,9 @@
id<MTLCommandBuffer> cmdBuffer = getPendingCommandBuffer(&mContext);
id<MTLBlitCommandEncoder> blitEncoder = [cmdBuffer blitCommandEncoder];
blitEncoder.label = @"Buffer upload blit";
[blitEncoder copyFromBuffer:staging->buffer.get()
[blitEncoder copyFromBuffer:staging->buffer
sourceOffset:0
toBuffer:mBuffer.get()
toBuffer:mBuffer
destinationOffset:byteOffset
size:size];
[blitEncoder endEncoding];
Expand All @@ -102,7 +106,7 @@
return nil;
}
assert_invariant(mBuffer);
return mBuffer.get();
return mBuffer;
}

void MetalBuffer::bindBuffers(id<MTLCommandBuffer> cmdBuffer, id<MTLCommandEncoder> encoder,
Expand Down
2 changes: 1 addition & 1 deletion filament/backend/src/metal/MetalBufferPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct MetalContext;

// Immutable POD representing a shared CPU-GPU buffer.
struct MetalBufferPoolEntry {
TrackedMetalBuffer buffer;
id<MTLBuffer> buffer;
size_t capacity;
mutable uint64_t lastAccessed;
mutable uint32_t referenceCount;
Expand Down
3 changes: 2 additions & 1 deletion filament/backend/src/metal/MetalBufferPool.mm
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@
buffer = [mContext.device newBufferWithLength:numBytes
options:MTLResourceStorageModeShared];
}
MetalBufferTracking::track(buffer, MetalBufferTracking::Type::STAGING);
ASSERT_POSTCONDITION(buffer, "Could not allocate Metal staging buffer of size %zu.", numBytes);
MetalBufferPoolEntry* stage = new MetalBufferPoolEntry {
.buffer = { buffer, TrackedMetalBuffer::Type::STAGING },
.buffer = buffer,
.capacity = numBytes,
.lastAccessed = mCurrentFrame,
.referenceCount = 1
Expand Down
22 changes: 13 additions & 9 deletions filament/backend/src/metal/MetalDriver.mm
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@
driverConfig.disableHandleUseAfterFreeCheck) {
mContext->driver = this;

TrackedMetalBuffer::setPlatform(platform);
ScopedAllocationTimer::setPlatform(platform);
MetalBufferTracking::initialize();
MetalBufferTracking::setPlatform(platform);

mContext->device = mPlatform.createDevice();
assert_invariant(mContext->device);
Expand Down Expand Up @@ -201,7 +202,7 @@
}

MetalDriver::~MetalDriver() noexcept {
TrackedMetalBuffer::setPlatform(nullptr);
MetalBufferTracking::setPlatform(nullptr);
ScopedAllocationTimer::setPlatform(nullptr);
mContext->device = nil;
mContext->emptyTexture = nil;
Expand All @@ -223,13 +224,16 @@
os_signpost_interval_begin(mContext->log, mContext->signpostId, "Frame encoding", "%{public}d", frameId);
#endif
if (mPlatform.hasDebugUpdateStatFunc()) {
mPlatform.debugUpdateStat("filament.metal.alive_buffers", TrackedMetalBuffer::getAliveBuffers());
mPlatform.debugUpdateStat("filament.metal.alive_buffers.generic",
TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::GENERIC));
mPlatform.debugUpdateStat("filament.metal.alive_buffers.ring",
TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::RING));
mPlatform.debugUpdateStat("filament.metal.alive_buffers.staging",
TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::STAGING));
#if FILAMENT_METAL_BUFFER_TRACKING
const uint64_t generic = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::GENERIC);
const uint64_t ring = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::RING);
const uint64_t staging = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::STAGING);
const uint64_t total = generic + ring + staging;
mPlatform.debugUpdateStat("filament.metal.alive_buffers2", total);
mPlatform.debugUpdateStat("filament.metal.alive_buffers2.generic", generic);
mPlatform.debugUpdateStat("filament.metal.alive_buffers2.ring", ring);
mPlatform.debugUpdateStat("filament.metal.alive_buffers2.staging", staging);
#endif
}
}

Expand Down
4 changes: 2 additions & 2 deletions filament/backend/src/metal/MetalHandles.mm
Original file line number Diff line number Diff line change
Expand Up @@ -789,13 +789,13 @@ static void func(void* user) {
PixelBufferDescriptor const& data, const PixelBufferShape& shape) {
const size_t stagingBufferSize = shape.totalBytes;
auto entry = context.bufferPool->acquireBuffer(stagingBufferSize);
memcpy(entry->buffer.get().contents,
memcpy(entry->buffer.contents,
static_cast<uint8_t*>(data.buffer) + shape.sourceOffset,
stagingBufferSize);
id<MTLCommandBuffer> blitCommandBuffer = getPendingCommandBuffer(&context);
id<MTLBlitCommandEncoder> blitCommandEncoder = [blitCommandBuffer blitCommandEncoder];
blitCommandEncoder.label = @"Texture upload buffer blit";
[blitCommandEncoder copyFromBuffer:entry->buffer.get()
[blitCommandEncoder copyFromBuffer:entry->buffer
sourceOffset:0
sourceBytesPerRow:shape.bytesPerRow
sourceBytesPerImage:shape.bytesPerSlice
Expand Down

0 comments on commit 54a800a

Please sign in to comment.