diff --git a/filament/backend/src/metal/MetalBuffer.h b/filament/backend/src/metal/MetalBuffer.h index 081ca6c3fd0..c86e08a8c7a 100644 --- a/filament/backend/src/metal/MetalBuffer.h +++ b/filament/backend/src/metal/MetalBuffer.h @@ -65,9 +65,12 @@ class ScopedAllocationTimer { const char* mName; }; -class TrackedMetalBuffer { -public: +#ifndef FILAMENT_METAL_BUFFER_TRACKING +#define FILAMENT_METAL_BUFFER_TRACKING 0 +#endif +class MetalBufferTracking { +public: static constexpr size_t EXCESS_BUFFER_COUNT = 30000; enum class Type { @@ -91,66 +94,57 @@ class TrackedMetalBuffer { } } - TrackedMetalBuffer() noexcept : mBuffer(nil) {} - TrackedMetalBuffer(nullptr_t) noexcept : mBuffer(nil) {} - TrackedMetalBuffer(id buffer, Type type) : mBuffer(buffer), mType(type) { - assert_invariant(type != Type::NONE); - if (buffer) { - aliveBuffers[toIndex(type)]++; - mType = type; - if (getAliveBuffers() >= EXCESS_BUFFER_COUNT) { - if (platform && platform->hasDebugUpdateStatFunc()) { - platform->debugUpdateStat("filament.metal.excess_buffers_allocated", - TrackedMetalBuffer::getAliveBuffers()); - } +#if FILAMENT_METAL_BUFFER_TRACKING + static void initialize() { + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + for (size_t i = 0; i < TypeCount; i++) { + aliveBuffers[i] = [NSHashTable weakObjectsHashTable]; } - } + }); } - ~TrackedMetalBuffer() { - if (mBuffer) { - assert_invariant(mType != Type::NONE); - aliveBuffers[toIndex(mType)]--; - } - } - - TrackedMetalBuffer(TrackedMetalBuffer&&) = delete; - TrackedMetalBuffer(TrackedMetalBuffer const&) = delete; - TrackedMetalBuffer& operator=(TrackedMetalBuffer const&) = delete; + static void setPlatform(MetalPlatform* p) { platform = p; } - TrackedMetalBuffer& operator=(TrackedMetalBuffer&& rhs) noexcept { - swap(rhs); - return *this; + static void track(id buffer, Type type) { + assert_invariant(type != Type::NONE); + if (UTILS_UNLIKELY(getAliveBuffers() >= EXCESS_BUFFER_COUNT)) { + if (platform && platform->hasDebugUpdateStatFunc()) { + platform->debugUpdateStat("filament.metal.excess_buffers_allocated", + MetalBufferTracking::getAliveBuffers()); + } + } + [aliveBuffers[toIndex(type)] addObject:buffer]; } - id get() const noexcept { return mBuffer; } - operator bool() const noexcept { return bool(mBuffer); } - static uint64_t getAliveBuffers() { uint64_t sum = 0; - for (const auto& v : aliveBuffers) { - sum += v; + for (size_t i = 1; i < TypeCount; i++) { + sum += getAliveBuffers(static_cast(i)); } return sum; } static uint64_t getAliveBuffers(Type type) { assert_invariant(type != Type::NONE); - return aliveBuffers[toIndex(type)]; + NSHashTable* hashTable = aliveBuffers[toIndex(type)]; + // Caution! We can't simply use hashTable.count here, which is inaccurate. + // See http://cocoamine.net/blog/2013/12/13/nsmaptable-and-zeroing-weak-references/ + return hashTable.objectEnumerator.allObjects.count; } - static void setPlatform(MetalPlatform* p) { platform = p; } +#else + static void initialize() {} + static void setPlatform(MetalPlatform* p) {} + static id track(id buffer, Type type) { return buffer; } + static uint64_t getAliveBuffers() { return 0; } + static uint64_t getAliveBuffers(Type type) { return 0; } +#endif private: - void swap(TrackedMetalBuffer& other) noexcept { - std::swap(mBuffer, other.mBuffer); - std::swap(mType, other.mType); - } - - id mBuffer; - Type mType = Type::NONE; - +#if FILAMENT_METAL_BUFFER_TRACKING + static std::array>*, TypeCount> aliveBuffers; static MetalPlatform* platform; - static std::array aliveBuffers; +#endif }; class MetalBuffer { @@ -204,7 +198,7 @@ class MetalBuffer { private: - TrackedMetalBuffer mBuffer; + id mBuffer; size_t mBufferSize = 0; void* mCpuBuffer = nullptr; MetalContext& mContext; @@ -253,9 +247,11 @@ class MetalRingBuffer { mBufferOptions(options), mSlotSizeBytes(computeSlotSize(layout)), mSlotCount(slotCount) { - ScopedAllocationTimer timer("ring"); - mBuffer = { [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions], - TrackedMetalBuffer::Type::RING }; + { + ScopedAllocationTimer timer("ring"); + mBuffer = [device newBufferWithLength:mSlotSizeBytes * mSlotCount options:mBufferOptions]; + } + MetalBufferTracking::track(mBuffer, MetalBufferTracking::Type::RING); assert_invariant(mBuffer); } @@ -275,11 +271,11 @@ class MetalRingBuffer { // finishes executing. { ScopedAllocationTimer timer("ring"); - mAuxBuffer = { [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions], - TrackedMetalBuffer::Type::RING }; + mAuxBuffer = [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions]; } + MetalBufferTracking::track(mAuxBuffer, MetalBufferTracking::Type::RING); assert_invariant(mAuxBuffer); - return { mAuxBuffer.get(), 0 }; + return { mAuxBuffer, 0 }; } mCurrentSlot = (mCurrentSlot + 1) % mSlotCount; mOccupiedSlots->fetch_add(1, std::memory_order_relaxed); @@ -308,9 +304,9 @@ class MetalRingBuffer { */ std::pair, NSUInteger> getCurrentAllocation() const { if (UTILS_UNLIKELY(mAuxBuffer)) { - return { mAuxBuffer.get(), 0 }; + return { mAuxBuffer, 0 }; } - return { mBuffer.get(), mCurrentSlot * mSlotSizeBytes }; + return { mBuffer, mCurrentSlot * mSlotSizeBytes }; } bool canAccomodateLayout(MTLSizeAndAlign layout) const { @@ -319,8 +315,8 @@ class MetalRingBuffer { private: id mDevice; - TrackedMetalBuffer mBuffer; - TrackedMetalBuffer mAuxBuffer; + id mBuffer; + id mAuxBuffer; MTLResourceOptions mBufferOptions; diff --git a/filament/backend/src/metal/MetalBuffer.mm b/filament/backend/src/metal/MetalBuffer.mm index 5f09a290781..32804f43f3c 100644 --- a/filament/backend/src/metal/MetalBuffer.mm +++ b/filament/backend/src/metal/MetalBuffer.mm @@ -22,10 +22,14 @@ namespace filament { namespace backend { -std::array TrackedMetalBuffer::aliveBuffers = { 0 }; -MetalPlatform* TrackedMetalBuffer::platform = nullptr; MetalPlatform* ScopedAllocationTimer::platform = nullptr; +#if FILAMENT_METAL_BUFFER_TRACKING +std::array>*, MetalBufferTracking::TypeCount> + MetalBufferTracking::aliveBuffers; +MetalPlatform* MetalBufferTracking::platform = nullptr; +#endif + MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage, size_t size, bool forceGpuBuffer) : mBufferSize(size), mContext(context) { // If the buffer is less than 4K in size and is updated frequently, we don't use an explicit @@ -41,9 +45,9 @@ // Otherwise, we allocate a private GPU buffer. { ScopedAllocationTimer timer("generic"); - mBuffer = { [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate], - TrackedMetalBuffer::Type::GENERIC }; + mBuffer = [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate]; } + MetalBufferTracking::track(mBuffer, MetalBufferTracking::Type::GENERIC); ASSERT_POSTCONDITION(mBuffer, "Could not allocate Metal buffer of size %zu.", size); } @@ -70,7 +74,7 @@ // Acquire a staging buffer to hold the contents of this update. MetalBufferPool* bufferPool = mContext.bufferPool; const MetalBufferPoolEntry* const staging = bufferPool->acquireBuffer(size); - memcpy(staging->buffer.get().contents, src, size); + memcpy(staging->buffer.contents, src, size); // The blit below requires that byteOffset be a multiple of 4. ASSERT_PRECONDITION(!(byteOffset & 0x3u), "byteOffset must be a multiple of 4"); @@ -79,9 +83,9 @@ id cmdBuffer = getPendingCommandBuffer(&mContext); id blitEncoder = [cmdBuffer blitCommandEncoder]; blitEncoder.label = @"Buffer upload blit"; - [blitEncoder copyFromBuffer:staging->buffer.get() + [blitEncoder copyFromBuffer:staging->buffer sourceOffset:0 - toBuffer:mBuffer.get() + toBuffer:mBuffer destinationOffset:byteOffset size:size]; [blitEncoder endEncoding]; @@ -102,7 +106,7 @@ return nil; } assert_invariant(mBuffer); - return mBuffer.get(); + return mBuffer; } void MetalBuffer::bindBuffers(id cmdBuffer, id encoder, diff --git a/filament/backend/src/metal/MetalBufferPool.h b/filament/backend/src/metal/MetalBufferPool.h index 03688ab3c43..2aa7e805a0d 100644 --- a/filament/backend/src/metal/MetalBufferPool.h +++ b/filament/backend/src/metal/MetalBufferPool.h @@ -32,7 +32,7 @@ struct MetalContext; // Immutable POD representing a shared CPU-GPU buffer. struct MetalBufferPoolEntry { - TrackedMetalBuffer buffer; + id buffer; size_t capacity; mutable uint64_t lastAccessed; mutable uint32_t referenceCount; diff --git a/filament/backend/src/metal/MetalBufferPool.mm b/filament/backend/src/metal/MetalBufferPool.mm index a1e54a46239..8c640f7d0a9 100644 --- a/filament/backend/src/metal/MetalBufferPool.mm +++ b/filament/backend/src/metal/MetalBufferPool.mm @@ -48,9 +48,10 @@ buffer = [mContext.device newBufferWithLength:numBytes options:MTLResourceStorageModeShared]; } + MetalBufferTracking::track(buffer, MetalBufferTracking::Type::STAGING); ASSERT_POSTCONDITION(buffer, "Could not allocate Metal staging buffer of size %zu.", numBytes); MetalBufferPoolEntry* stage = new MetalBufferPoolEntry { - .buffer = { buffer, TrackedMetalBuffer::Type::STAGING }, + .buffer = buffer, .capacity = numBytes, .lastAccessed = mCurrentFrame, .referenceCount = 1 diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index 62cba820401..6ac20797571 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -105,8 +105,9 @@ driverConfig.disableHandleUseAfterFreeCheck) { mContext->driver = this; - TrackedMetalBuffer::setPlatform(platform); ScopedAllocationTimer::setPlatform(platform); + MetalBufferTracking::initialize(); + MetalBufferTracking::setPlatform(platform); mContext->device = mPlatform.createDevice(); assert_invariant(mContext->device); @@ -201,7 +202,7 @@ } MetalDriver::~MetalDriver() noexcept { - TrackedMetalBuffer::setPlatform(nullptr); + MetalBufferTracking::setPlatform(nullptr); ScopedAllocationTimer::setPlatform(nullptr); mContext->device = nil; mContext->emptyTexture = nil; @@ -223,13 +224,16 @@ os_signpost_interval_begin(mContext->log, mContext->signpostId, "Frame encoding", "%{public}d", frameId); #endif if (mPlatform.hasDebugUpdateStatFunc()) { - mPlatform.debugUpdateStat("filament.metal.alive_buffers", TrackedMetalBuffer::getAliveBuffers()); - mPlatform.debugUpdateStat("filament.metal.alive_buffers.generic", - TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::GENERIC)); - mPlatform.debugUpdateStat("filament.metal.alive_buffers.ring", - TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::RING)); - mPlatform.debugUpdateStat("filament.metal.alive_buffers.staging", - TrackedMetalBuffer::getAliveBuffers(TrackedMetalBuffer::Type::STAGING)); +#if FILAMENT_METAL_BUFFER_TRACKING + const uint64_t generic = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::GENERIC); + const uint64_t ring = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::RING); + const uint64_t staging = MetalBufferTracking::getAliveBuffers(MetalBufferTracking::Type::STAGING); + const uint64_t total = generic + ring + staging; + mPlatform.debugUpdateStat("filament.metal.alive_buffers2", total); + mPlatform.debugUpdateStat("filament.metal.alive_buffers2.generic", generic); + mPlatform.debugUpdateStat("filament.metal.alive_buffers2.ring", ring); + mPlatform.debugUpdateStat("filament.metal.alive_buffers2.staging", staging); +#endif } } diff --git a/filament/backend/src/metal/MetalHandles.mm b/filament/backend/src/metal/MetalHandles.mm index e8ab879729a..15958ab60ef 100644 --- a/filament/backend/src/metal/MetalHandles.mm +++ b/filament/backend/src/metal/MetalHandles.mm @@ -789,13 +789,13 @@ static void func(void* user) { PixelBufferDescriptor const& data, const PixelBufferShape& shape) { const size_t stagingBufferSize = shape.totalBytes; auto entry = context.bufferPool->acquireBuffer(stagingBufferSize); - memcpy(entry->buffer.get().contents, + memcpy(entry->buffer.contents, static_cast(data.buffer) + shape.sourceOffset, stagingBufferSize); id blitCommandBuffer = getPendingCommandBuffer(&context); id blitCommandEncoder = [blitCommandBuffer blitCommandEncoder]; blitCommandEncoder.label = @"Texture upload buffer blit"; - [blitCommandEncoder copyFromBuffer:entry->buffer.get() + [blitCommandEncoder copyFromBuffer:entry->buffer sourceOffset:0 sourceBytesPerRow:shape.bytesPerRow sourceBytesPerImage:shape.bytesPerSlice