intel · bbernhar · Apr 19, 2022 · Apr 19, 2022
diff --git a/src/gpgmm/SlabMemoryAllocator.cpp b/src/gpgmm/SlabMemoryAllocator.cpp
@@ -116,55 +116,57 @@ namespace gpgmm {
         SlabCache* cache = GetOrCreateCache(slabSize);
         ASSERT(cache != nullptr);
 
-        auto* node = cache->FreeList.head();
+        auto* pHead = cache->FreeList.head();
 
-        Slab* slab = nullptr;
+        Slab* pFreeSlab = nullptr;
+
+        // Check free-list since HEAD must always exist (linked-list is self-referential).
         if (!cache->FreeList.empty()) {
-            slab = node->value();
+            pFreeSlab = pHead->value();
         }
 
-        // Splice the full slab from the free-list to full-list.
-        if (slab != nullptr && slab->IsFull()) {
-            node->RemoveFromList();
-            node->InsertBefore(cache->FullList.head());
+        // Splice the full slab from the free-list to the full-list.
+        if (pFreeSlab != nullptr && pFreeSlab->IsFull()) {
+            pHead->RemoveFromList();
+            pHead->InsertBefore(cache->FullList.head());
         }
 
-        // Push new slab at HEAD if free-list is empty.
-        if (cache->FreeList.empty()) {
-            Slab* newSlab = new Slab(slabSize / mBlockSize, mBlockSize);
-            newSlab->InsertBefore(cache->FreeList.head());
-            slab = newSlab;
+        // Push new free slab at free-list HEAD
+        if (cache->FreeList.empty() || pFreeSlab->IsFull()) {
+            Slab* pNewFreeSlab = new Slab(slabSize / mBlockSize, mBlockSize);
+            pNewFreeSlab->InsertBefore(cache->FreeList.head());
+            pFreeSlab = pNewFreeSlab;
         }
 
-        ASSERT(!cache->FreeList.empty());
-        ASSERT(slab != nullptr);
+        ASSERT(pFreeSlab != nullptr);
+        ASSERT(!pFreeSlab->IsFull());
 
         std::unique_ptr<MemoryAllocation> subAllocation;
         GPGMM_TRY_ASSIGN(
             TrySubAllocateMemory(
-                &slab->Allocator, mBlockSize, alignment,
+                &pFreeSlab->Allocator, mBlockSize, alignment,
                 [&](const auto& block) -> MemoryBase* {
-                    if (slab->SlabMemory == nullptr) {
+                    if (pFreeSlab->SlabMemory == nullptr) {
                         // Resolve the pending pre-fetched allocation.
                         if (mNextSlabAllocationEvent != nullptr) {
                             mNextSlabAllocationEvent->Wait();
-                            slab->SlabMemory = mNextSlabAllocationEvent->AcquireAllocation();
+                            pFreeSlab->SlabMemory = mNextSlabAllocationEvent->AcquireAllocation();
                             mNextSlabAllocationEvent.reset();
                         } else {
                             GPGMM_TRY_ASSIGN(mMemoryAllocator->TryAllocateMemory(
                                                  slabSize, mSlabAlignment, neverAllocate, cacheSize,
                                                  /*prefetchMemory*/ false),
-                                             slab->SlabMemory);
+                                             pFreeSlab->SlabMemory);
                         }
                     }
-                    return slab->SlabMemory->GetMemory();
+                    return pFreeSlab->SlabMemory->GetMemory();
                 }),
             subAllocation);
 
-        // Slab must be referenced seperately from its memory because slab memory could be already
-        // allocated from another allocator. Only once the final allocation on the slab is
-        // deallocated, can slab memory be safely released.
-        slab->Ref();
+        // Slab must be referenced seperately from the underlying memory because slab memory could
+        // be already allocated by another allocator. Only once the final allocation on the slab is
+        // deallocated, does the slab memory be released.
+        pFreeSlab->Ref();
 
         // Prefetch memory for future slab.
         //
@@ -178,9 +180,9 @@ namespace gpgmm {
         // time before deciding to prefetch.
         //
         if ((prefetchMemory || mPrefetchSlab) && !neverAllocate &&
-            mNextSlabAllocationEvent == nullptr && cache->FullList.head() != nullptr &&
-            slab->GetUsedPercent() >= kSlabPrefetchUsageThreshold &&
-            slab->BlockCount >= kSlabPrefetchTotalBlockCount) {
+            mNextSlabAllocationEvent == nullptr && !cache->FullList.empty() &&
+            pFreeSlab->GetUsedPercent() >= kSlabPrefetchUsageThreshold &&
+            pFreeSlab->BlockCount >= kSlabPrefetchTotalBlockCount) {
             mNextSlabAllocationEvent =
                 mMemoryAllocator->TryAllocateMemoryAsync(slabSize, mSlabAlignment);
         }
@@ -190,9 +192,10 @@ namespace gpgmm {
         // memory and not the slab.
         BlockInSlab* blockInSlab = new BlockInSlab();
         blockInSlab->pBlock = subAllocation->GetBlock();
-        blockInSlab->pSlab = slab;
+        blockInSlab->pSlab = pFreeSlab;
         blockInSlab->Size = subAllocation->GetBlock()->Size;
-        blockInSlab->Offset = slab->SlabMemory->GetOffset() + subAllocation->GetBlock()->Offset;
+        blockInSlab->Offset =
+            pFreeSlab->SlabMemory->GetOffset() + subAllocation->GetBlock()->Offset;
 
         mInfo.UsedBlockCount++;
         mInfo.UsedBlockUsage += blockInSlab->Size;

diff --git a/src/gpgmm/SlabMemoryAllocator.h b/src/gpgmm/SlabMemoryAllocator.h
@@ -68,21 +68,23 @@ namespace gpgmm {
         uint64_t ComputeSlabSize(uint64_t size) const;
 
         // Slab is a node in a doubly-linked list that contains a free-list of blocks
-        // and a reference to the underlying memory.
+        // and a reference to underlying memory.
         struct Slab : public LinkNode<Slab>, public RefCounted {
             Slab(uint64_t blockCount, uint64_t blockSize)
                 : RefCounted(0), BlockCount(blockCount), Allocator(blockCount, blockSize) {
             }
+
             ~Slab() {
                 if (IsInList()) {
                     RemoveFromList();
                 }
             }
+
             bool IsFull() const {
                 return static_cast<uint32_t>(GetRefCount()) == BlockCount;
             }
 
-            double GetUsedPercent() {
+            double GetUsedPercent() const {
                 return static_cast<uint32_t>(GetRefCount()) / static_cast<double>(BlockCount);
             }
 

diff --git a/src/tests/end2end/D3D12ResourceAllocatorTests.cpp b/src/tests/end2end/D3D12ResourceAllocatorTests.cpp
@@ -806,7 +806,27 @@ TEST_F(D3D12ResourceAllocatorTests, CreateTexturePooled) {
 }
 
 // Creates a bunch of small buffers using the smallest size allowed so GPU memory is pre-fetched.
-TEST_F(D3D12ResourceAllocatorTests, CreateBufferPrefetch) {
+TEST_F(D3D12ResourceAllocatorTests, CreateBufferMany) {
+    ComPtr<ResourceAllocator> allocator;
+    ASSERT_SUCCEEDED(ResourceAllocator::CreateAllocator(CreateBasicAllocatorDesc(), &allocator));
+    ASSERT_NE(allocator, nullptr);
+
+    constexpr uint64_t kNumOfBuffers = 1000u;
+
+    std::set<ComPtr<ResourceAllocation>> allocs = {};
+    for (uint64_t i = 0; i < kNumOfBuffers; i++) {
+        ComPtr<ResourceAllocation> allocation;
+        ASSERT_SUCCEEDED(allocator->CreateResource(
+            {}, CreateBasicBufferDesc(1), D3D12_RESOURCE_STATE_COMMON, nullptr, &allocation));
+        ASSERT_NE(allocation, nullptr);
+        allocs.insert(allocation);
+    }
+
+    allocs.clear();
+}
+
+// Creates a bunch of small buffers using the smallest size allowed so GPU memory is pre-fetched.
+TEST_F(D3D12ResourceAllocatorTests, CreateBufferManyPrefetch) {
     ComPtr<ResourceAllocator> allocator;
     ASSERT_SUCCEEDED(ResourceAllocator::CreateAllocator(
         CreateBasicAllocatorDesc(/*enablePrefetch*/ true), &allocator));

diff --git a/src/tests/unittests/SlabMemoryAllocatorTests.cpp b/src/tests/unittests/SlabMemoryAllocatorTests.cpp
@@ -147,29 +147,55 @@ TEST(SlabMemoryAllocatorTests, SingleSlab) {
 TEST(SlabMemoryAllocatorTests, MultipleSlabs) {
     std::unique_ptr<DummyMemoryAllocator> dummyMemoryAllocator =
         std::make_unique<DummyMemoryAllocator>();
-
     constexpr uint64_t kBlockSize = 32;
     constexpr uint64_t kMaxSlabSize = 512;
-    SlabMemoryAllocator allocator(kBlockSize, kMaxSlabSize, kDefaultSlabSize, kDefaultSlabAlignment,
-                                  kDefaultSlabFragmentationLimit, kDefaultPrefetchSlab,
-                                  dummyMemoryAllocator.get());
-    // Fill up exactly two 128B slabs.
-    std::vector<std::unique_ptr<MemoryAllocation>> allocations = {};
-    for (uint32_t blocki = 0; blocki < (kDefaultSlabSize * 2 / kBlockSize); blocki++) {
-        std::unique_ptr<MemoryAllocation> allocation =
-            allocator.TryAllocateMemory(22, 1, false, false, false);
-        ASSERT_NE(allocation, nullptr);
-        allocations.push_back(std::move(allocation));
-    }
 
-    EXPECT_EQ(allocator.GetSlabSizeForTesting(), 2u);
+    // Fill up exactly N slabs (allocation = block = slab size).
+    {
+        SlabMemoryAllocator allocator(kBlockSize, kMaxSlabSize, /*slabSize*/ kBlockSize,
+                                      kDefaultSlabAlignment, kDefaultSlabFragmentationLimit,
+                                      kDefaultPrefetchSlab, dummyMemoryAllocator.get());
+        const uint64_t kNumOfSlabs = 12;
+        std::vector<std::unique_ptr<MemoryAllocation>> allocations = {};
+        for (uint32_t slabi = 0; slabi < kNumOfSlabs; slabi++) {
+            std::unique_ptr<MemoryAllocation> allocation =
+                allocator.TryAllocateMemory(kBlockSize, 1, false, false, false);
+            ASSERT_NE(allocation, nullptr);
+            allocations.push_back(std::move(allocation));
+        }
+
+        EXPECT_EQ(allocator.GetSlabSizeForTesting(), kNumOfSlabs);
 
-    // Free both slabs.
-    for (auto& allocation : allocations) {
-        allocator.DeallocateMemory(std::move(allocation));
+        for (auto& allocation : allocations) {
+            allocator.DeallocateMemory(std::move(allocation));
+        }
+
+        EXPECT_EQ(allocator.GetSlabSizeForTesting(), 0u);
     }
 
-    EXPECT_EQ(allocator.GetSlabSizeForTesting(), 0u);
+    // Fill up slabs through pre-allocation (allocation < block < slab size).
+    {
+        SlabMemoryAllocator allocator(kBlockSize, kMaxSlabSize, kDefaultSlabSize,
+                                      kDefaultSlabAlignment, kDefaultSlabFragmentationLimit,
+                                      kDefaultPrefetchSlab, dummyMemoryAllocator.get());
+        // Fill up exactly two 128B slabs.
+        std::vector<std::unique_ptr<MemoryAllocation>> allocations = {};
+        for (uint32_t blocki = 0; blocki < (kDefaultSlabSize * 2 / kBlockSize); blocki++) {
+            std::unique_ptr<MemoryAllocation> allocation =
+                allocator.TryAllocateMemory(22, 1, false, false, false);
+            ASSERT_NE(allocation, nullptr);
+            allocations.push_back(std::move(allocation));
+        }
+
+        EXPECT_EQ(allocator.GetSlabSizeForTesting(), 2u);
+
+        // Free both slabs.
+        for (auto& allocation : allocations) {
+            allocator.DeallocateMemory(std::move(allocation));
+        }
+
+        EXPECT_EQ(allocator.GetSlabSizeForTesting(), 0u);
+    }
 }
 
 // Verify a very large allocation does not overflow.