Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VulkanPushPool - more efficient replacement for 3x VulkanPushBuffer #17122

Merged
merged 12 commits into from
Mar 15, 2023
1 change: 1 addition & 0 deletions Common/GPU/Vulkan/VulkanContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ class VulkanContext {
}

int GetInflightFrames() const {
// out of MAX_INFLIGHT_FRAMES.
return inflightFrames_;
}
// Don't call while a frame is in progress.
Expand Down
191 changes: 172 additions & 19 deletions Common/GPU/Vulkan/VulkanMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,31 @@

#include "Common/Log.h"
#include "Common/TimeUtil.h"
#include "Common/Math/math_util.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"
#include "Common/Data/Text/Parsers.h"

using namespace PPSSPP_VK;

// Always keep around push buffers at least this long (seconds).
static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;

// Global push buffer tracker for vulkan memory profiling.
// Don't want to manually dig up all the active push buffers.
static std::mutex g_pushBufferListMutex;
static std::set<VulkanPushBuffer *> g_pushBuffers;
static std::set<VulkanMemoryManager *> g_pushBuffers;

std::vector<VulkanMemoryManager *> GetActiveVulkanMemoryManagers() {
std::vector<VulkanMemoryManager *> buffers;
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
for (auto iter : g_pushBuffers) {
buffers.push_back(iter);
}
return buffers;
}

VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage, PushBufferType type)
: vulkan_(vulkan), name_(name), size_(size), usage_(usage), type_(type) {
VulkanPushBuffer::VulkanPushBuffer(VulkanContext *vulkan, const char *name, size_t size, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), size_(size), usage_(usage) {
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.insert(this);
Expand All @@ -55,15 +69,6 @@ VulkanPushBuffer::~VulkanPushBuffer() {
_assert_(buffers_.empty());
}

std::vector<VulkanPushBuffer *> VulkanPushBuffer::GetAllActive() {
std::vector<VulkanPushBuffer *> buffers;
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
for (auto iter : g_pushBuffers) {
buffers.push_back(iter);
}
return buffers;
}

bool VulkanPushBuffer::AddBuffer() {
BufInfo info;
VkDevice device = vulkan_->GetDevice();
Expand All @@ -77,7 +82,7 @@ bool VulkanPushBuffer::AddBuffer() {
b.pQueueFamilyIndices = nullptr;

VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = type_ == PushBufferType::CPU_TO_GPU ? VMA_MEMORY_USAGE_CPU_TO_GPU : VMA_MEMORY_USAGE_GPU_ONLY;
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};

VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &info.buffer, &info.allocation, &allocInfo);
Expand All @@ -103,8 +108,7 @@ void VulkanPushBuffer::Destroy(VulkanContext *vulkan) {

void VulkanPushBuffer::NextBuffer(size_t minSize) {
// First, unmap the current memory.
if (type_ == PushBufferType::CPU_TO_GPU)
Unmap();
Unmap();

buf_++;
if (buf_ >= buffers_.size() || minSize > size_) {
Expand All @@ -123,8 +127,7 @@ void VulkanPushBuffer::NextBuffer(size_t minSize) {

// Now, move to the next buffer and map it.
offset_ = 0;
if (type_ == PushBufferType::CPU_TO_GPU)
Map();
Map();
}

void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
Expand All @@ -149,8 +152,13 @@ size_t VulkanPushBuffer::GetTotalSize() const {
return sum;
}

size_t VulkanPushBuffer::GetTotalCapacity() const {
return size_ * buffers_.size();
void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
size_t capacity = size_ * buffers_.size();
snprintf(buffer, bufSize, "Push %s: %s/%s", name_, NiceSizeFormat(capacity).c_str(), NiceSizeFormat(sum).c_str());
}

void VulkanPushBuffer::Map() {
Expand Down Expand Up @@ -264,3 +272,148 @@ VkResult VulkanDescSetPool::Recreate(bool grow) {
}
return result;
}

VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.insert(this);
}

for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
blocks_.push_back(CreateBlock(originalBlockSize));
blocks_.back().original = true;
blocks_.back().frameIndex = i;
}
}

VulkanPushPool::~VulkanPushPool() {
{
std::lock_guard<std::mutex> guard(g_pushBufferListMutex);
g_pushBuffers.erase(this);
}

_dbg_assert_(blocks_.empty());
}

void VulkanPushPool::Destroy() {
for (auto &block : blocks_) {
block.Destroy(vulkan_);
}
blocks_.clear();
}

VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
Block block{};
block.size = size;
block.frameIndex = -1;

VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = size;
b.usage = usage_;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};

VkResult result = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &block.buffer, &block.allocation, &allocInfo);
_assert_(result == VK_SUCCESS);

result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr));
_assert_(result == VK_SUCCESS);

return block;
}

VulkanPushPool::Block::~Block() {}

void VulkanPushPool::Block::Destroy(VulkanContext *vulkan) {
vmaUnmapMemory(vulkan->Allocator(), allocation);
vulkan->Delete().QueueDeleteBufferAllocation(buffer, allocation);
}

void VulkanPushPool::BeginFrame() {
double now = time_now_d();
curBlockIndex_ = -1;
for (auto &block : blocks_) {
if (block.frameIndex == vulkan_->GetCurFrame()) {
if (curBlockIndex_ == -1) {
// Pick a block associated with the current frame to start at.
// We always start with one block per frame index.
curBlockIndex_ = block.frameIndex;
block.lastUsed = now;
}
block.used = 0;
if (!block.original) {
// Return block to the common pool
block.frameIndex = -1;
}
}
}

// Do a single pass of bubblesort to move the bigger buffers earlier in the sequence.
// Over multiple frames this will quickly converge to the right order.
for (size_t i = 3; i < blocks_.size() - 1; i++) {
if (blocks_[i].frameIndex == -1 && blocks_[i + 1].frameIndex == -1 && blocks_[i].size < blocks_[i + 1].size) {
std::swap(blocks_[i], blocks_[i + 1]);
}
}

// If we have lots of little buffers and the last one hasn't been used in a while, drop it.
// Still, let's keep around a few big ones (6 - 3).
if (blocks_.size() > 6 && blocks_.back().lastUsed < now - PUSH_GARBAGE_COLLECTION_DELAY) {
double start = time_now_d();
size_t size = blocks_.back().size;
blocks_.back().Destroy(vulkan_);
blocks_.pop_back();
DEBUG_LOG(G3D, "%s: Garbage collected block of size %s in %0.2f ms", name_, NiceSizeFormat(size).c_str(), time_now_d() - start);
}
}

void VulkanPushPool::NextBlock(VkDeviceSize allocationSize) {
int curFrameIndex = vulkan_->GetCurFrame();
curBlockIndex_++;
while (curBlockIndex_ < blocks_.size()) {
Block &block = blocks_[curBlockIndex_];
// Grab the first matching block, or unused block (frameIndex == -1).
if ((block.frameIndex == curFrameIndex || block.frameIndex == -1) && block.size >= allocationSize) {
_assert_(block.used == 0);
block.used = allocationSize;
block.lastUsed = time_now_d();
block.frameIndex = curFrameIndex;
return;
}
curBlockIndex_++;
}

double start = time_now_d();
VkDeviceSize newBlockSize = std::max(originalBlockSize_ * 2, (VkDeviceSize)RoundUpToPowerOf2((uint32_t)allocationSize));
// We're still here and ran off the end of blocks. Create a new one.
blocks_.push_back(CreateBlock(newBlockSize));
blocks_.back().frameIndex = curFrameIndex;
blocks_.back().used = allocationSize;
blocks_.back().lastUsed = time_now_d();
// curBlockIndex_ is already set correctly here.
DEBUG_LOG(G3D, "%s: Created new block of size %s in %0.2f ms", name_, NiceSizeFormat(newBlockSize).c_str(), 1000.0 * (time_now_d() - start));
}

size_t VulkanPushPool::GetUsedThisFrame() const {
size_t used = 0;
for (auto &block : blocks_) {
if (block.frameIndex == vulkan_->GetCurFrame()) {
used += block.used;
}
}
return used;
}

void VulkanPushPool::GetDebugString(char *buffer, size_t bufSize) const {
size_t used = 0;
size_t capacity = 0;
for (auto &block : blocks_) {
used += block.used;
capacity += block.size;
}

snprintf(buffer, bufSize, "Pool %s: %s / %s (%d extra blocks)", name_, NiceSizeFormat(used).c_str(), NiceSizeFormat(capacity).c_str(), (int)blocks_.size() - 3);
}
Loading