From f0ee3b8daae91c61258759de12d0cf702eb5d2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 09:44:58 +0200 Subject: [PATCH 01/12] Fill in descriptors on the render thread in the PPSSPP UI. --- Common/Data/Collections/FastVec.h | 6 + Common/GPU/Vulkan/VulkanDescSet.cpp | 4 +- Common/GPU/Vulkan/VulkanDescSet.h | 9 +- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 25 +++- Common/GPU/Vulkan/VulkanQueueRunner.h | 10 +- Common/GPU/Vulkan/VulkanRenderManager.cpp | 152 ++++++++++++++++++++-- Common/GPU/Vulkan/VulkanRenderManager.h | 112 +++++++++++++++- Common/GPU/Vulkan/thin3d_vulkan.cpp | 151 ++++----------------- Common/UI/Context.cpp | 2 - 9 files changed, 321 insertions(+), 150 deletions(-) diff --git a/Common/Data/Collections/FastVec.h b/Common/Data/Collections/FastVec.h index 819e68799bd1..b78cab6f7771 100644 --- a/Common/Data/Collections/FastVec.h +++ b/Common/Data/Collections/FastVec.h @@ -117,6 +117,12 @@ class FastVec { IncreaseCapacityTo(newCapacity); } + void extend(const T *newData, size_t count) { + IncreaseCapacityTo(size_ + count); + memcpy(data_ + size_, newData, count * sizeof(T)); + size_ += count; + } + void LockCapacity() { #ifdef _DEBUG capacityLocked_ = true; diff --git a/Common/GPU/Vulkan/VulkanDescSet.cpp b/Common/GPU/Vulkan/VulkanDescSet.cpp index 06c2cc48fdff..444a3670dc7d 100644 --- a/Common/GPU/Vulkan/VulkanDescSet.cpp +++ b/Common/GPU/Vulkan/VulkanDescSet.cpp @@ -75,7 +75,9 @@ VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout * usage_++; - vulkan_->SetDebugName(desc, VK_OBJECT_TYPE_DESCRIPTOR_SET, tag); + if (tag) { + vulkan_->SetDebugName(desc, VK_OBJECT_TYPE_DESCRIPTOR_SET, tag); + } return desc; } diff --git a/Common/GPU/Vulkan/VulkanDescSet.h b/Common/GPU/Vulkan/VulkanDescSet.h index e42e24b30ce9..82007962a36d 100644 --- a/Common/GPU/Vulkan/VulkanDescSet.h +++ b/Common/GPU/Vulkan/VulkanDescSet.h @@ -18,7 +18,7 @@ enum class BindingType { // Only appropriate for use in a per-frame pool. class VulkanDescSetPool { public: - VulkanDescSetPool(const char *tag, bool grow) : tag_(tag), grow_(grow) {} + VulkanDescSetPool(const char *tag = "", bool grow = true) : tag_(tag), grow_(grow) {} ~VulkanDescSetPool(); // Must call this before use: defines how to clear cache of ANY returned values from Allocate(). @@ -32,6 +32,13 @@ class VulkanDescSetPool { void Reset(); void Destroy(); + void SetTag(const char *tag) { + tag_ = tag; + } + bool IsDestroyed() const { + return !descPool_; + } + private: VkResult Recreate(bool grow); diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index c38a2bd6d94f..0b774b25690f 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -338,7 +338,7 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { } } -void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) { +void VulkanQueueRunner::RunSteps(std::vector &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) { QueueProfileContext *profile = frameData.profile.enabled ? &frameData.profile : nullptr; if (profile) @@ -394,7 +394,7 @@ void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frame vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo); } } - PerformRenderPass(step, cmd); + PerformRenderPass(step, cmd, curFrame); break; case VKRStepType::COPY: PerformCopy(step, cmd); @@ -1103,7 +1103,7 @@ void TransitionFromOptimal(VkCommandBuffer cmd, VkImage colorImage, VkImageLayou } } -void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd) { +void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd, int curFrame) { for (size_t i = 0; i < step.preTransitions.size(); i++) { const TransitionRequest &iter = step.preTransitions[i]; if (iter.aspect == VK_IMAGE_ASPECT_COLOR_BIT && iter.fb->color.layout != iter.targetLayout) { @@ -1199,6 +1199,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c // The stencil ones are very commonly mostly redundant so let's eliminate them where possible. // Might also want to consider scissor and viewport. VkPipeline lastPipeline = VK_NULL_HANDLE; + VKRPipelineLayout *vkrPipelineLayout = nullptr; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; bool pipelineOK = false; @@ -1241,7 +1242,8 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c if (pipeline != VK_NULL_HANDLE) { vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - pipelineLayout = c.pipeline.pipelineLayout->pipelineLayout; + vkrPipelineLayout = c.pipeline.pipelineLayout; + pipelineLayout = vkrPipelineLayout->pipelineLayout; lastGraphicsPipeline = graphicsPipeline; pipelineOK = true; } else { @@ -1336,7 +1338,12 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW_INDEXED: if (pipelineOK) { - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &c.drawIndexed.ds, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets); + VkDescriptorSet set; + if (c.drawIndexed.ds) + set = c.drawIndexed.ds; + else + set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets); vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16); VkDeviceSize voffset = c.drawIndexed.voffset; vkCmdBindVertexBuffers(cmd, 0, 1, &c.drawIndexed.vbuffer, &voffset); @@ -1346,7 +1353,13 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW: if (pipelineOK) { - vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &c.draw.ds, c.draw.numUboOffsets, c.draw.uboOffsets); + VkDescriptorSet set; + if (c.drawIndexed.ds) + set = c.drawIndexed.ds; + else + set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + _dbg_assert_(set != VK_NULL_HANDLE); + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets); if (c.draw.vbuffer) { vkCmdBindVertexBuffers(cmd, 0, 1, &c.draw.vbuffer, &c.draw.voffset); } diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 4ce030e370ed..e26b33cf4ea5 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -71,6 +71,7 @@ struct VkRenderData { } compute_pipeline; struct { VkDescriptorSet ds; + uint32_t descSetIndex; int numUboOffsets; uint32_t uboOffsets[3]; VkBuffer vbuffer; @@ -80,6 +81,7 @@ struct VkRenderData { } draw; struct { VkDescriptorSet ds; + uint32_t descSetIndex; uint32_t uboOffsets[3]; uint16_t numUboOffsets; uint16_t instances; @@ -119,9 +121,7 @@ struct VkRenderData { const char *annotation; } debugAnnotation; struct { - int setNumber; - VkDescriptorSet set; - VkPipelineLayout pipelineLayout; + int setIndex; } bindDescSet; }; }; @@ -231,7 +231,7 @@ class VulkanQueueRunner { } void PreprocessSteps(std::vector &steps); - void RunSteps(std::vector &steps, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false); + void RunSteps(std::vector &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false); void LogSteps(const std::vector &steps, bool verbose); static std::string StepToString(VulkanContext *vulkan, const VKRStep &step); @@ -291,7 +291,7 @@ class VulkanQueueRunner { bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering. VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd); - void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd); + void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd, int curFrame); void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd); void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd); void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd, FrameData &frameData); diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 92ff72ae18f2..b8c3937f5cc5 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -265,6 +265,7 @@ VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread, initTimeMs_("initTimeMs"), totalGPUTimeMs_("totalGPUTimeMs"), renderCPUTimeMs_("renderCPUTimeMs"), + descUpdateTimeMs_("descUpdateCPUTimeMs"), useRenderThread_(useThread), frameTimeHistory_(frameTimeHistory) { @@ -645,6 +646,8 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile PollPresentTiming(); + ResetDescriptorLists(curFrame); + int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits; FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameId); @@ -679,6 +682,9 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0); renderCPUTimeMs_.Format(line, sizeof(line)); str << line; + descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0); + descUpdateTimeMs_.Format(line, sizeof(line)); + str << line; for (int i = 0; i < numQueries - 1; i++) { uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask; double milliseconds = (double)diff * timestampConversionFactor; @@ -1462,6 +1468,11 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) { } frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_); + // Flush descriptors. + double descStart = time_now_d(); + FlushDescriptors(task.frame); + frameData.profile.descWriteTime = time_now_d() - descStart; + if (!frameData.hasMainCommands) { // Effectively resets both main and present command buffers, since they both live in this pool. // We always record main commands first, so we don't need to reset the present command buffer separately. @@ -1483,11 +1494,11 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) { int passes = GetVRPassesCount(); for (int i = 0; i < passes; i++) { PreVRFrameRender(i); - queueRunner_.RunSteps(task.steps, frameData, frameDataShared_, i < passes - 1); + queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_, i < passes - 1); PostVRFrameRender(); } } else { - queueRunner_.RunSteps(task.steps, frameData, frameDataShared_); + queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_); } switch (task.runType) { @@ -1562,16 +1573,16 @@ void VulkanRenderManager::ResetStats() { renderCPUTimeMs_.Reset(); } -VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingCount, bool geoShadersEnabled, const char *tag) { +VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingTypesCount, bool geoShadersEnabled, const char *tag) { VKRPipelineLayout *layout = new VKRPipelineLayout(); layout->tag = tag; - layout->bindingCount = (uint32_t)bindingCount; + layout->bindingTypesCount = (uint32_t)bindingTypesCount; - _dbg_assert_(bindingCount <= ARRAY_SIZE(layout->bindingTypes)); - memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingCount); + _dbg_assert_(bindingTypesCount <= ARRAY_SIZE(layout->bindingTypes)); + memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingTypesCount); VkDescriptorSetLayoutBinding bindings[VKRPipelineLayout::MAX_DESC_SET_BINDINGS]; - for (int i = 0; i < bindingCount; i++) { + for (int i = 0; i < bindingTypesCount; i++) { bindings[i].binding = i; bindings[i].descriptorCount = 1; bindings[i].pImmutableSamplers = nullptr; @@ -1611,7 +1622,7 @@ VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindin } VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO }; - dsl.bindingCount = (uint32_t)bindingCount; + dsl.bindingCount = (uint32_t)bindingTypesCount; dsl.pBindings = bindings; VkResult res = vkCreateDescriptorSetLayout(vulkan_->GetDevice(), &dsl, nullptr, &layout->descriptorSetLayout); _assert_(VK_SUCCESS == res && layout->descriptorSetLayout); @@ -1625,10 +1636,135 @@ VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindin vulkan_->SetDebugName(layout->descriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, tag); vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag); + + for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { + layout->descPools[i].Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 512); + layout->descPools[i].Setup([]() {}); + } + + pipelineLayouts_.push_back(layout); return layout; } void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) { + for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { + layout->descPools[i].Destroy(); + } + vulkan_->Delete().QueueDeletePipelineLayout(layout->pipelineLayout); vulkan_->Delete().QueueDeleteDescriptorSetLayout(layout->descriptorSetLayout); + for (auto iter = pipelineLayouts_.begin(); iter != pipelineLayouts_.end(); iter++) { + if (*iter == layout) { + pipelineLayouts_.erase(iter); + break; + } + } +} + +void VulkanRenderManager::FlushDescriptors(int frame) { + for (auto iter : pipelineLayouts_) { + iter->FlushDescSets(vulkan_, frame); + } +} + +void VulkanRenderManager::ResetDescriptorLists(int frame) { + for (auto iter : pipelineLayouts_) { + iter->flushedDescriptors_[frame] = 0; + iter->descSets_[frame].clear(); + iter->descData_[frame].clear(); + } +} + +void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame) { + _dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES); + + VulkanDescSetPool &pool = descPools[frame]; + FastVec &descData = descData_[frame]; + FastVec &descSets = descSets_[frame]; + + pool.Reset(); + + // This will write all descriptors. + // Initially, we won't do any de-duplication, so no hashmap lookups but also extra cost of writing additional descriptors. + // A short look-back might be enough? + + // Initially, let's do naive single desc set writes. + VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS]; + VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number + VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS]; + + for (size_t index = flushedDescriptors_[frame]; index < descSets.size(); index++) { + auto &d = descSets[index]; + + // TODO: This is where to look up to see if we already have an identical descriptor previously in the array. + // We can do this with a simple custom hash map here that doesn't handle collisions, since false positives aren't too bad. + // Should probably check history, one or two items, then fall back to lookup. Or we should do the history lookup in BindDescriptors... + + // For now we just allocate unconditionally. + d.set = pool.Allocate(1, &descriptorSetLayout, nullptr); + + const PackedDescriptor *data = descData.begin() + d.offset; + int numWrites = 0; + int numBuffers = 0; + int numImages = 0; + for (int i = 0; i < d.count; i++) { + if (!data[i].image.view) { // This automatically also checks for an null buffer. + continue; + } + + switch (this->bindingTypes[i]) { + case BindingType::COMBINED_IMAGE_SAMPLER: + _dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE); + imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageInfo[numImages].imageView = data[i].image.view; + imageInfo[numImages].sampler = data[i].image.sampler; + writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + writes[numWrites].pImageInfo = &imageInfo[numImages]; + writes[numWrites].pBufferInfo = nullptr; + numImages++; + break; + case BindingType::STORAGE_IMAGE_COMPUTE: + imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + imageInfo[numImages].imageView = data[i].image.view; + imageInfo[numImages].sampler = VK_NULL_HANDLE; + writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[numWrites].pImageInfo = &imageInfo[numImages]; + writes[numWrites].pBufferInfo = nullptr; + numImages++; + break; + case BindingType::STORAGE_BUFFER_VERTEX: + case BindingType::STORAGE_BUFFER_COMPUTE: + bufferInfo[numBuffers].buffer = data[i].buffer.buffer; + bufferInfo[numBuffers].offset = data[i].buffer.offset; + bufferInfo[numBuffers].range = data[i].buffer.range; + writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[numWrites].pBufferInfo = &bufferInfo[numBuffers]; + writes[numWrites].pImageInfo = nullptr; + numBuffers++; + break; + case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL: + case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX: + bufferInfo[numBuffers].buffer = data[i].buffer.buffer; + bufferInfo[numBuffers].offset = 0; + bufferInfo[numBuffers].range = data[i].buffer.range; + writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + writes[numWrites].pBufferInfo = &bufferInfo[numBuffers]; + writes[numWrites].pImageInfo = nullptr; + numBuffers++; + break; + } + writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[numWrites].pNext = nullptr; + writes[numWrites].descriptorCount = 1; + writes[numWrites].dstArrayElement = 0; + writes[numWrites].dstBinding = i; + writes[numWrites].dstSet = d.set; + writes[numWrites].pTexelBufferView = nullptr; + numWrites++; + } + + vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr); + } + + flushedDescriptors_[frame] = (int)descSets.size(); } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 9a7ef0f79595..d4d7be5bf7a3 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -185,19 +185,55 @@ struct CompileQueueEntry { VkSampleCountFlagBits sampleCount; }; +// Pending descriptor sets. +// TODO: Sort these by VKRPipelineLayout to avoid storing it for each element. +struct PendingDescSet { + int offset; // probably enough with a u16. + u8 count; + VkDescriptorSet set; +}; + +struct PackedDescriptor { + union { + struct { + VkImageView view; + VkSampler sampler; + } image; + struct { + VkBuffer buffer; + uint32_t offset; + uint32_t range; + } buffer; + }; +}; + // Note that we only support a single descriptor set due to compatibility with some ancient devices. // We should probably eventually give that up. struct VKRPipelineLayout { + VKRPipelineLayout() {} ~VKRPipelineLayout() { _assert_(!pipelineLayout && !descriptorSetLayout); + _assert_(descPools[0].IsDestroyed()); } enum { MAX_DESC_SET_BINDINGS = 10 }; BindingType bindingTypes[MAX_DESC_SET_BINDINGS]; - uint32_t bindingCount; - VkPipelineLayout pipelineLayout; - VkDescriptorSetLayout descriptorSetLayout; // only support 1 for now. + + uint32_t bindingTypesCount = 0; + VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptorSetLayout = VK_NULL_HANDLE; // only support 1 for now. int pushConstSize = 0; - const char *tag; + const char *tag = nullptr; + + // The pipeline layout owns the descriptor set pools. Don't go create excessive layouts. + VulkanDescSetPool descPools[VulkanContext::MAX_INFLIGHT_FRAMES]; + + // TODO: We should be able to get away with a single descData_/descSets_ and then send it along, + // but it's easier to just segregate by frame id. + FastVec descData_[VulkanContext::MAX_INFLIGHT_FRAMES]; + FastVec descSets_[VulkanContext::MAX_INFLIGHT_FRAMES]; + int flushedDescriptors_[VulkanContext::MAX_INFLIGHT_FRAMES]{}; + + void FlushDescSets(VulkanContext *vulkan, int frame); }; class VulkanRenderManager { @@ -283,6 +319,7 @@ class VulkanRenderManager { // DebugBreak(); // } curPipelineFlags_ |= flags; + curPipelineLayout_ = pipelineLayout; return true; } @@ -412,6 +449,46 @@ class VulkanRenderManager { curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE; } +private: + // Descriptors will match the current pipeline layout, set by the last call to BindPipeline. + // Count is the count of void*s. Two are needed for COMBINED_IMAGE_SAMPLER, everything else is a single one. + // The goal is to keep this function very small and fast, and do the expensive work on the render thread or + // another thread. + int BindDescriptors(const PackedDescriptor *desc, int count) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); + + int curFrame = vulkan_->GetCurFrame(); + + size_t offset = curPipelineLayout_->descData_[curFrame].size(); + curPipelineLayout_->descData_[curFrame].extend(desc, count); + + int setIndex = (int)curPipelineLayout_->descSets_[curFrame].size(); + PendingDescSet &descSet = curPipelineLayout_->descSets_[curFrame].push_uninitialized(); + descSet.offset = (uint32_t)offset; + descSet.count = count; + descSet.set = VK_NULL_HANDLE; // to be filled in + return setIndex; + } + +public: + void Draw(const PackedDescriptor *desc, int descCount, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); + int setIndex = BindDescriptors(desc, descCount); + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::DRAW; + data.draw.count = count; + data.draw.offset = offset; + data.draw.ds = VK_NULL_HANDLE; + data.draw.descSetIndex = setIndex; + data.draw.vbuffer = vbuffer; + data.draw.voffset = voffset; + data.draw.numUboOffsets = numUboOffsets; + _dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.draw.uboOffsets)); + for (int i = 0; i < numUboOffsets; i++) + data.draw.uboOffsets[i] = uboOffsets[i]; + curRenderStep_->render.numDraws++; + } + void Draw(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); VkRenderData &data = curRenderStep_->commands.push_uninitialized(); @@ -428,6 +505,26 @@ class VulkanRenderManager { curRenderStep_->render.numDraws++; } + void DrawIndexed(const PackedDescriptor *desc, int descCount, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); + int setIndex = BindDescriptors(desc, descCount); + VkRenderData &data = curRenderStep_->commands.push_uninitialized(); + data.cmd = VKRRenderCommand::DRAW_INDEXED; + data.drawIndexed.count = count; + data.drawIndexed.instances = numInstances; + data.drawIndexed.ds = VK_NULL_HANDLE; + data.drawIndexed.descSetIndex = setIndex; + data.drawIndexed.vbuffer = vbuffer; + data.drawIndexed.voffset = voffset; + data.drawIndexed.ibuffer = ibuffer; + data.drawIndexed.ioffset = ioffset; + data.drawIndexed.numUboOffsets = numUboOffsets; + _dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.drawIndexed.uboOffsets)); + for (int i = 0; i < numUboOffsets; i++) + data.drawIndexed.uboOffsets[i] = uboOffsets[i]; + curRenderStep_->render.numDraws++; + } + void DrawIndexed(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); VkRenderData &data = curRenderStep_->commands.push_uninitialized(); @@ -505,6 +602,9 @@ class VulkanRenderManager { void PresentWaitThreadFunc(); void PollPresentTiming(); + void ResetDescriptorLists(int frame); + void FlushDescriptors(int frame); + FrameDataShared frameDataShared_; FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES]; @@ -572,9 +672,13 @@ class VulkanRenderManager { SimpleStat initTimeMs_; SimpleStat totalGPUTimeMs_; SimpleStat renderCPUTimeMs_; + SimpleStat descUpdateTimeMs_; std::function invalidationCallback_; uint64_t frameIdGen_ = FRAME_TIME_HISTORY_LENGTH; HistoryBuffer &frameTimeHistory_; + + VKRPipelineLayout *curPipelineLayout_ = nullptr; + std::vector pipelineLayouts_; }; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 118b38b8ce21..6e376617e9f2 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -27,19 +27,13 @@ #include "Common/Data/Convert/SmallDataConvert.h" #include "Common/GPU/thin3d.h" #include "Common/GPU/Vulkan/VulkanRenderManager.h" -#include "Common/GPU/Vulkan/VulkanDescSet.h" #include "Common/GPU/Vulkan/VulkanContext.h" #include "Common/GPU/Vulkan/VulkanImage.h" #include "Common/GPU/Vulkan/VulkanMemory.h" #include "Common/GPU/Vulkan/VulkanLoader.h" #include "Common/Thread/Promise.h" -// We support a frame-global descriptor set, which can be optionally used by other code, -// but is not directly used by thin3d. It has to be defined here though, be in set 0 -// and specified in every pipeline layout, otherwise it can't sit undisturbed when other -// descriptor sets are bound on top. - -// For descriptor set 1, we use a simple descriptor set for all thin3d rendering: 1 UBO binding point, 3 combined texture/samples. +// For descriptor set 0 (the only one), we use a simple descriptor set for all thin3d rendering: 1 UBO binding point, 3 combined texture/samples. // // binding 0 - uniform buffer // binding 1 - texture/sampler @@ -510,7 +504,7 @@ class VKContext : public DrawContext { } } - VkDescriptorSet GetOrCreateDescriptorSet(VkBuffer buffer); + void BindDescriptors(VkBuffer buffer, PackedDescriptor descriptors[4]); std::vector GetFeatureList() const override; std::vector GetExtensionList(bool device, bool enabledOnly) const override; @@ -562,19 +556,6 @@ class VKContext : public DrawContext { VulkanPushPool *push_ = nullptr; - struct FrameData { - FrameData() : descriptorPool("VKContext", false) { - descriptorPool.Setup([this] { descSets_.clear(); }); - } - // Per-frame descriptor set cache. As it's per frame and reset every frame, we don't need to - // worry about invalidating descriptors pointing to deleted textures. - // However! ARM is not a fan of doing it this way. - std::map descSets_; - VulkanDescSetPool descriptorPool; - }; - - FrameData frame_[VulkanContext::MAX_INFLIGHT_FRAMES]; - DeviceCaps caps_{}; uint8_t stencilRef_ = 0; @@ -1053,10 +1034,6 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread) } pipelineLayout_ = renderManager_.CreatePipelineLayout(bindings, ARRAY_SIZE(bindings), caps_.geometryShaderSupported, "thin3d_layout"); - for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - frame_[i].descriptorPool.Create(vulkan_, bindings, ARRAY_SIZE(bindings), 1024); - } - VkPipelineCacheCreateInfo pc{ VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO }; VkResult res = vkCreatePipelineCache(vulkan_->GetDevice(), &pc, nullptr, &pipelineCache_); _assert_(VK_SUCCESS == res); @@ -1066,10 +1043,6 @@ VKContext::~VKContext() { DestroyPresets(); delete nullTexture_; - // This also destroys all descriptor sets. - for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - frame_[i].descriptorPool.Destroy(); - } push_->Destroy(); delete push_; renderManager_.DestroyPipelineLayout(pipelineLayout_); @@ -1078,12 +1051,7 @@ VKContext::~VKContext() { void VKContext::BeginFrame(DebugFlags debugFlags) { renderManager_.BeginFrame(debugFlags & DebugFlags::PROFILE_TIMESTAMPS, debugFlags & DebugFlags::PROFILE_SCOPES); - - FrameData &frame = frame_[vulkan_->GetCurFrame()]; - push_->BeginFrame(); - - frame.descriptorPool.Reset(); } void VKContext::EndFrame() { @@ -1121,81 +1089,30 @@ void VKContext::WipeQueue() { renderManager_.Wipe(); } -VkDescriptorSet VKContext::GetOrCreateDescriptorSet(VkBuffer buf) { - DescriptorSetKey key{}; - - FrameData *frame = &frame_[vulkan_->GetCurFrame()]; +void VKContext::BindDescriptors(VkBuffer buf, PackedDescriptor descriptors[4]) { + descriptors[0].buffer.buffer = buf; + descriptors[0].buffer.offset = 0; // dynamic + descriptors[0].buffer.range = curPipeline_->GetUBOSize(); + int numDescs = 1; for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) { + VkImageView view; + VkSampler sampler; if (boundTextures_[i]) { - key.imageViews_[i] = (boundTextureFlags_[i] & TextureBindFlags::VULKAN_BIND_ARRAY) ? boundTextures_[i]->GetImageArrayView() : boundTextures_[i]->GetImageView(); + view = (boundTextureFlags_[i] & TextureBindFlags::VULKAN_BIND_ARRAY) ? boundTextures_[i]->GetImageArrayView() : boundTextures_[i]->GetImageView(); } else { - key.imageViews_[i] = boundImageView_[i]; + view = boundImageView_[i]; } - key.samplers_[i] = boundSamplers_[i]; - } - key.buffer_ = buf; - - auto iter = frame->descSets_.find(key); - if (iter != frame->descSets_.end()) { - return iter->second; - } - - VkDescriptorSet descSet = frame->descriptorPool.Allocate(1, &pipelineLayout_->descriptorSetLayout, "thin3d_descset"); - if (descSet == VK_NULL_HANDLE) { - ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed"); - return VK_NULL_HANDLE; - } - - vulkan_->SetDebugName(descSet, VK_OBJECT_TYPE_DESCRIPTOR_SET, "(thin3d desc set)"); + sampler = boundSamplers_[i] ? boundSamplers_[i]->GetSampler() : VK_NULL_HANDLE; - VkDescriptorBufferInfo bufferDesc; - bufferDesc.buffer = buf; - bufferDesc.offset = 0; - bufferDesc.range = curPipeline_->GetUBOSize(); - - VkDescriptorImageInfo imageDesc[MAX_BOUND_TEXTURES]{}; - VkWriteDescriptorSet writes[1 + MAX_BOUND_TEXTURES]{}; - - // If handles are NULL for whatever buggy reason, it's best to leave the descriptors - // unwritten instead of trying to write a zero, which is not legal. - - int numWrites = 0; - if (buf) { - writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[numWrites].dstSet = descSet; - writes[numWrites].dstArrayElement = 0; - writes[numWrites].dstBinding = 0; - writes[numWrites].pBufferInfo = &bufferDesc; - writes[numWrites].pImageInfo = nullptr; - writes[numWrites].pTexelBufferView = nullptr; - writes[numWrites].descriptorCount = 1; - writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - numWrites++; - } - - for (int i = 0; i < MAX_BOUND_TEXTURES; ++i) { - if (key.imageViews_[i] && key.samplers_[i] && key.samplers_[i]->GetSampler()) { - imageDesc[i].imageView = key.imageViews_[i]; - imageDesc[i].sampler = key.samplers_[i]->GetSampler(); - imageDesc[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[numWrites].dstSet = descSet; - writes[numWrites].dstArrayElement = 0; - writes[numWrites].dstBinding = i + 1; - writes[numWrites].pBufferInfo = nullptr; - writes[numWrites].pImageInfo = &imageDesc[i]; - writes[numWrites].pTexelBufferView = nullptr; - writes[numWrites].descriptorCount = 1; - writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - numWrites++; + if (view && sampler) { + descriptors[i + 1].image.view = view; + descriptors[i + 1].image.sampler = sampler; + } else { + descriptors[i + 1].image.view = VK_NULL_HANDLE; + descriptors[i + 1].image.sampler = VK_NULL_HANDLE; } } - - vkUpdateDescriptorSets(device_, numWrites, writes, 0, nullptr); - - frame->descSets_[key] = descSet; - return descSet; } Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char *tag) { @@ -1445,7 +1362,7 @@ void VKContext::BindTextures(int start, int count, Texture **textures, TextureBi boundTextures_[i] = static_cast(textures[i - start]); boundTextureFlags_[i] = flags; if (boundTextures_[i]) { - // If a texture is bound, we set these up in GetOrCreateDescriptorSet too. + // If a texture is bound, we set these up in BindDescriptors too. // But we might need to set the view here anyway so it can be queried using GetNativeObject. if (flags & TextureBindFlags::VULKAN_BIND_ARRAY) { boundImageView_[i] = boundTextures_[i]->GetImageArrayView(); @@ -1498,15 +1415,11 @@ void VKContext::Draw(int vertexCount, int offset) { uint32_t ubo_offset = (uint32_t)curPipeline_->PushUBO(push_, vulkan_, &vulkanUBObuf); size_t vbBindOffset = push_->Push(vbuf->GetData(), vbuf->GetSize(), 4, &vulkanVbuf); - VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf); - if (descSet == VK_NULL_HANDLE) { - ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__); - return; - } - BindCurrentPipeline(); ApplyDynamicState(); - renderManager_.Draw(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount, offset); + PackedDescriptor descriptors[4]; + BindDescriptors(vulkanUBObuf, descriptors); + renderManager_.Draw(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount, offset); } void VKContext::DrawIndexed(int vertexCount, int offset) { @@ -1518,15 +1431,11 @@ void VKContext::DrawIndexed(int vertexCount, int offset) { size_t vbBindOffset = push_->Push(vbuf->GetData(), vbuf->GetSize(), 4, &vulkanVbuf); size_t ibBindOffset = push_->Push(ibuf->GetData(), ibuf->GetSize(), 4, &vulkanIbuf); - VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf); - if (descSet == VK_NULL_HANDLE) { - ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__); - return; - } - BindCurrentPipeline(); ApplyDynamicState(); - renderManager_.DrawIndexed(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1); + PackedDescriptor descriptors[4]; + BindDescriptors(vulkanUBObuf, descriptors); + renderManager_.DrawIndexed(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1); } void VKContext::DrawUP(const void *vdata, int vertexCount) { @@ -1544,15 +1453,11 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) { uint32_t ubo_offset = (uint32_t)curPipeline_->PushUBO(push_, vulkan_, &vulkanUBObuf); - VkDescriptorSet descSet = GetOrCreateDescriptorSet(vulkanUBObuf); - if (descSet == VK_NULL_HANDLE) { - ERROR_LOG(G3D, "GetOrCreateDescriptorSet failed, skipping %s", __FUNCTION__); - return; - } - BindCurrentPipeline(); ApplyDynamicState(); - renderManager_.Draw(descSet, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount); + PackedDescriptor descriptors[4]; + BindDescriptors(vulkanUBObuf, descriptors); + renderManager_.Draw(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount); } void VKContext::BindCurrentPipeline() { diff --git a/Common/UI/Context.cpp b/Common/UI/Context.cpp index 31b59419ed67..a660ece7658f 100644 --- a/Common/UI/Context.cpp +++ b/Common/UI/Context.cpp @@ -83,8 +83,6 @@ void UIContext::BeginPipeline(Draw::Pipeline *pipeline, Draw::SamplerState *samp // Also clear out any other textures bound. Draw::SamplerState *samplers[3]{ samplerState }; draw_->BindSamplerStates(0, 3, samplers); - Draw::Texture *textures[2]{}; - draw_->BindTextures(1, 2, textures); RebindTexture(); UIBegin(pipeline); } From af47ad035d29a9902229d5a41ae39d1f5ee8ceed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 11:51:32 +0200 Subject: [PATCH 02/12] Also use the new descriptor mechanism for in-game --- GPU/Common/DrawEngineCommon.cpp | 4 +- GPU/Vulkan/DrawEngineVulkan.cpp | 103 +++++++++++++++++--------------- GPU/Vulkan/DrawEngineVulkan.h | 20 +------ 3 files changed, 57 insertions(+), 70 deletions(-) diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 17015b336227..1a72aaf52f57 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -687,7 +687,7 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * DeferredVerts &dv = drawVerts_[prevDrawVerts]; int offset = dv.vertexCount; - _dbg_assert_(numDrawInds_ < MAX_DEFERRED_DRAW_INDS); + _dbg_assert_(numDrawInds_ <= MAX_DEFERRED_DRAW_INDS); // if it's equal, the check below will take care of it before any action is taken. _dbg_assert_(numDrawVerts_ > 0); while (cmd != stall) { @@ -713,8 +713,6 @@ int DrawEngineCommon::ExtendNonIndexedPrim(const uint32_t *cmd, const uint32_t * cmd++; } - _dbg_assert_(cmd != start); - int totalCount = offset - dv.vertexCount; dv.vertexCount = offset; dv.indexUpperBound = dv.vertexCount - 1; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index a2e2128401a6..0c1a00d86651 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -98,17 +98,6 @@ void DrawEngineVulkan::InitDeviceObjects() { VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); pipelineLayout_ = renderManager->CreatePipelineLayout(bindingTypes, ARRAY_SIZE(bindingTypes), draw_->GetDeviceCaps().geometryShaderSupported, "drawengine_layout"); - static constexpr int DEFAULT_DESC_POOL_SIZE = 512; - - // We are going to use one-shot descriptors in the initial implementation. Might look into caching them - // if creating and updating them turns out to be expensive. - for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - frame_[i].descPool.Create(vulkan, bindingTypes, ARRAY_SIZE(bindingTypes), DEFAULT_DESC_POOL_SIZE); - - // Note that pushUBO_ is also used for tessellation data (search for SetPushBuffer), and to upload - // the null texture. This should be cleaned up... - } - pushUBO_ = (VulkanPushPool *)draw_->GetNativeObject(Draw::NativeObject::PUSH_POOL); pushVertex_ = new VulkanPushPool(vulkan, "pushVertex", 4 * 1024 * 1024, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); pushIndex_ = new VulkanPushPool(vulkan, "pushIndex", 1 * 512 * 1024, VK_BUFFER_USAGE_INDEX_BUFFER_BIT); @@ -140,10 +129,6 @@ DrawEngineVulkan::~DrawEngineVulkan() { DestroyDeviceObjects(); } -void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) { - descPool.Destroy(); -} - void DrawEngineVulkan::DestroyDeviceObjects() { if (!draw_) { // We've already done this from LostDevice. @@ -159,10 +144,6 @@ void DrawEngineVulkan::DestroyDeviceObjects() { tessDataTransfer = nullptr; tessDataTransferVulkan = nullptr; - for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - frame_[i].Destroy(vulkan); - } - pushUBO_ = nullptr; if (pushVertex_) { @@ -220,8 +201,6 @@ void DrawEngineVulkan::BeginFrame() { DirtyAllUBOs(); - FrameData *frame = &GetCurFrame(); - // First reset all buffers, then begin. This is so that Reset can free memory and Begin can allocate it, // if growing the buffer is needed. Doing it this way will reduce fragmentation if more than one buffer // needs to grow in the same frame. The state where many buffers are reset can also be used to @@ -241,8 +220,6 @@ void DrawEngineVulkan::BeginFrame() { vertexCache_->BeginNoReset(); - frame->descPool.Reset(); - if (--decimationCounter_ <= 0) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; @@ -268,7 +245,6 @@ void DrawEngineVulkan::BeginFrame() { } void DrawEngineVulkan::EndFrame() { - FrameData *frame = &GetCurFrame(); stats_.pushVertexSpaceUsed = (int)pushVertex_->GetUsedThisFrame(); stats_.pushIndexSpaceUsed = (int)pushIndex_->GetUsedThisFrame(); vertexCache_->End(); @@ -296,6 +272,7 @@ void DrawEngineVulkan::DecodeVertsToPushPool(VulkanPushPool *push, uint32_t *bin DecodeVerts(dest); } +/* VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) { _dbg_assert_(base != VK_NULL_HANDLE); _dbg_assert_(light != VK_NULL_HANDLE); @@ -319,13 +296,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView } } - // Didn't find one in the frame descriptor set cache, let's make a new one. - // We wipe the cache on every frame. - VkDescriptorSet desc = frame.descPool.Allocate(1, &pipelineLayout_->descriptorSetLayout, "game_descset"); - - // Even in release mode, this is bad. - _assert_msg_(desc != VK_NULL_HANDLE, "Ran out of descriptor space in pool. sz=%d", (int)frame.descSets.size()); - // We just don't write to the slots we don't care about, which is fine. VkWriteDescriptorSet writes[DRAW_BINDING_COUNT]{}; // Main texture @@ -441,6 +411,7 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView frame.descSets.Insert(key, desc); return desc; } +*/ void DrawEngineVulkan::DirtyAllUBOs() { baseUBOOffset = 0; @@ -632,7 +603,6 @@ void DrawEngineVulkan::DoFlush() { VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); PROFILE_THIS_SCOPE("Flush"); - FrameData &frameData = GetCurFrame(); bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE; @@ -757,10 +727,34 @@ void DrawEngineVulkan::DoFlush() { lastPrim_ = prim; dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering()); - UpdateUBOs(&frameData); - - VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess); - + UpdateUBOs(); + + PackedDescriptor descriptors[9]{}; + int descCount = 6; + descriptors[0].image.view = imageView; + descriptors[0].image.sampler = sampler; + if (boundSecondary_) { + descriptors[1].image.view = boundSecondary_; + descriptors[1].image.sampler = samplerSecondaryNearest_; + } + if (boundDepal_) { + descriptors[2].image.view = boundDepal_; + descriptors[2].image.sampler = boundDepalSmoothed_ ? samplerSecondaryLinear_ : samplerSecondaryNearest_; + } + descriptors[3].buffer.buffer = baseBuf; + descriptors[3].buffer.range = sizeof(UB_VS_FS_Base); + descriptors[4].buffer.buffer = lightBuf; + descriptors[4].buffer.range = sizeof(UB_VS_Lights); + descriptors[5].buffer.buffer = boneBuf; + descriptors[5].buffer.range = sizeof(UB_VS_Bones); + if (tess) { + const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo(); + for (int j = 0; j < 3; j++) { + descriptors[j + 6].buffer.buffer = bufInfo[j].buffer; + descriptors[j + 6].buffer.offset = bufInfo[j].offset; + descriptors[j + 6].buffer.range = bufInfo[j].range; + } + } // TODO: Can we avoid binding all three when not needed? Same below for hardware transform. // Think this will require different descriptor set layouts. const uint32_t dynamicUBOOffsets[3] = { @@ -770,9 +764,9 @@ void DrawEngineVulkan::DoFlush() { if (!ibuf) { ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * indexGen.VertexCount(), 4, &ibuf); } - renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1); + renderManager->DrawIndexed(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1); } else { - renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount); + renderManager->Draw(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount); } } else { PROFILE_THIS_SCOPE("soft"); @@ -898,9 +892,27 @@ void DrawEngineVulkan::DoFlush() { dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering()); // Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered - UpdateUBOs(&frameData); + UpdateUBOs(); + + PackedDescriptor descriptors[9]{}; + int descCount = 6; + descriptors[0].image.view = imageView; + descriptors[0].image.sampler = sampler; + if (boundSecondary_) { + descriptors[1].image.view = boundSecondary_; + descriptors[1].image.sampler = samplerSecondaryNearest_; + } + if (boundDepal_) { + descriptors[2].image.view = boundDepal_; + descriptors[2].image.sampler = boundDepalSmoothed_ ? samplerSecondaryLinear_ : samplerSecondaryNearest_; + } + descriptors[3].buffer.buffer = baseBuf; + descriptors[3].buffer.range = sizeof(UB_VS_FS_Base); + descriptors[4].buffer.buffer = lightBuf; + descriptors[4].buffer.range = sizeof(UB_VS_Lights); + descriptors[5].buffer.buffer = boneBuf; + descriptors[5].buffer.range = sizeof(UB_VS_Bones); - VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess); const uint32_t dynamicUBOOffsets[3] = { baseUBOOffset, lightUBOOffset, boneUBOOffset, }; @@ -911,11 +923,11 @@ void DrawEngineVulkan::DoFlush() { VkBuffer vbuf, ibuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf); ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); - renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); + renderManager->DrawIndexed(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); } else { VkBuffer vbuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf); - renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans); + renderManager->Draw(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans); } } else if (result.action == SW_CLEAR) { // Note: we won't get here if the clear is alpha but not color, or color but not alpha. @@ -964,7 +976,7 @@ void DrawEngineVulkan::ResetAfterDraw() { gstate_c.vertexFullAlpha = true; } -void DrawEngineVulkan::UpdateUBOs(FrameData *frame) { +void DrawEngineVulkan::UpdateUBOs() { if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) { baseUBOOffset = shaderManager_->PushBaseBuffer(pushUBO_, &baseBuf); dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS; @@ -979,11 +991,6 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) { } } -DrawEngineVulkan::FrameData &DrawEngineVulkan::GetCurFrame() { - VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); - return frame_[vulkan->GetCurFrame()]; -} - void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) { // SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members // also need to be 16-byte aligned, hence the padding. diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index 261b70e08126..9e883b0121ff 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -234,13 +234,10 @@ class DrawEngineVulkan : public DrawEngineCommon { void DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf); void DoFlush(); - void UpdateUBOs(FrameData *frame); - FrameData &GetCurFrame(); + void UpdateUBOs(); NO_INLINE void ResetAfterDraw(); - VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess); - Draw::DrawContext *draw_; // We use a shared descriptor set layouts for all PSP draws. @@ -269,22 +266,7 @@ class DrawEngineVulkan : public DrawEngineCommon { // for all draws in a frame, except when the buffer has to grow. }; - // We alternate between these. - struct FrameData { - FrameData() : descSets(512), descPool("DrawEngine", true) { - descPool.Setup([this] { descSets.Clear(); }); - } - - VulkanDescSetPool descPool; - - // We do rolling allocation and reset instead of caching across frames. That we might do later. - DenseHashMap descSets; - - void Destroy(VulkanContext *vulkan); - }; - GEPrimitiveType lastPrim_ = GE_PRIM_INVALID; - FrameData frame_[VulkanContext::MAX_INFLIGHT_FRAMES]; // This one's not accurately named, it's used for all kinds of stuff that's not vertices or indices. VulkanPushPool *pushUBO_ = nullptr; From 84f9c1694f269ad1fb4ed2f11cdc8c181ae2c723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 16:55:05 +0200 Subject: [PATCH 03/12] Better logging in descpool --- Common/GPU/Vulkan/VulkanDescSet.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Common/GPU/Vulkan/VulkanDescSet.cpp b/Common/GPU/Vulkan/VulkanDescSet.cpp index 444a3670dc7d..05ea9eadb65b 100644 --- a/Common/GPU/Vulkan/VulkanDescSet.cpp +++ b/Common/GPU/Vulkan/VulkanDescSet.cpp @@ -57,6 +57,7 @@ VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout * VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc); if (result == VK_ERROR_FRAGMENTED_POOL || result < 0) { + WARN_LOG(G3D, "Pool %s %s - recreating", tag_, result == VK_ERROR_FRAGMENTED_POOL ? "fragmented" : "full"); // There seems to have been a spec revision. Here we should apparently recreate the descriptor pool, // so let's do that. See https://www.khronos.org/registry/vulkan/specs/1.0/man/html/vkAllocateDescriptorSets.html // Fragmentation shouldn't really happen though since we wipe the pool every frame. @@ -112,7 +113,7 @@ VkResult VulkanDescSetPool::Recreate(bool grow) { // Delete the pool if it already exists. if (descPool_ != VK_NULL_HANDLE) { - DEBUG_LOG(G3D, "Reallocating %s desc pool from %d to %d", tag_, prevSize, info_.maxSets); + INFO_LOG(G3D, "Reallocating %s desc pool from %d to %d", tag_, prevSize, info_.maxSets); vulkan_->Delete().QueueDeleteDescriptorPool(descPool_); clear_(); usage_ = 0; From 018e4ee1d61d0a62714212cd9f5c0051e3bf8e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 17:28:56 +0200 Subject: [PATCH 04/12] Show the desc set write time even in the limited GPU profiler. --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index b8c3937f5cc5..de8b2e348ecd 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -710,6 +710,9 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0); renderCPUTimeMs_.Format(line, sizeof(line)); str << line; + descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0); + descUpdateTimeMs_.Format(line, sizeof(line)); + str << line; frameData.profile.profileSummary = str.str(); } } From 8a4d84d82b914440eacc6457ca719007477ae44d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 21:36:27 +0200 Subject: [PATCH 05/12] Simplest possible de-duplication of descriptor set writes --- Common/Data/Collections/FastVec.h | 1 + Common/GPU/Vulkan/VulkanFrameData.h | 1 + Common/GPU/Vulkan/VulkanRenderManager.cpp | 26 ++++++++++++++++++++--- Common/GPU/Vulkan/VulkanRenderManager.h | 2 +- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Common/Data/Collections/FastVec.h b/Common/Data/Collections/FastVec.h index b78cab6f7771..b428682786ff 100644 --- a/Common/Data/Collections/FastVec.h +++ b/Common/Data/Collections/FastVec.h @@ -69,6 +69,7 @@ class FastVec { void clear() { size_ = 0; } bool empty() const { return size_ == 0; } + const T *data() { return data_; } T *begin() { return data_; } T *end() { return data_ + size_; } const T *begin() const { return data_; } diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 9723c779bffe..0085c3ed9660 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -28,6 +28,7 @@ struct QueueProfileContext { double cpuStartTime; double cpuEndTime; double descWriteTime; + int descriptorsWritten; }; class VKRFramebuffer; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index de8b2e348ecd..49e8c18c11b9 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -685,6 +685,8 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0); descUpdateTimeMs_.Format(line, sizeof(line)); str << line; + snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten); + str << line; for (int i = 0; i < numQueries - 1; i++) { uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask; double milliseconds = (double)diff * timestampConversionFactor; @@ -713,10 +715,14 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0); descUpdateTimeMs_.Format(line, sizeof(line)); str << line; + snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten); + str << line; frameData.profile.profileSummary = str.str(); } } + frameData.profile.descriptorsWritten = 0; + // Must be after the fence - this performs deletes. VLOG("PUSH: BeginFrame %d", curFrame); @@ -1666,7 +1672,7 @@ void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) { void VulkanRenderManager::FlushDescriptors(int frame) { for (auto iter : pipelineLayouts_) { - iter->FlushDescSets(vulkan_, frame); + iter->FlushDescSets(vulkan_, frame, &frameData_[frame].profile); } } @@ -1678,7 +1684,7 @@ void VulkanRenderManager::ResetDescriptorLists(int frame) { } } -void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame) { +void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) { _dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES); VulkanDescSetPool &pool = descPools[frame]; @@ -1696,12 +1702,23 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame) { VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS]; - for (size_t index = flushedDescriptors_[frame]; index < descSets.size(); index++) { + size_t start = flushedDescriptors_[frame]; + int writeCount = 0; + + for (size_t index = start; index < descSets.size(); index++) { auto &d = descSets[index]; // TODO: This is where to look up to see if we already have an identical descriptor previously in the array. // We can do this with a simple custom hash map here that doesn't handle collisions, since false positives aren't too bad. // Should probably check history, one or two items, then fall back to lookup. Or we should do the history lookup in BindDescriptors... + if (index > start + 1) { + if (descSets[index - 1].count == d.count) { + if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) { + d.set = descSets[index - 1].set; + continue; + } + } + } // For now we just allocate unconditionally. d.set = pool.Allocate(1, &descriptorSetLayout, nullptr); @@ -1767,7 +1784,10 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame) { } vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr); + + writeCount++; } flushedDescriptors_[frame] = (int)descSets.size(); + profile->descriptorsWritten += writeCount; } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index d4d7be5bf7a3..32472682d567 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -233,7 +233,7 @@ struct VKRPipelineLayout { FastVec descSets_[VulkanContext::MAX_INFLIGHT_FRAMES]; int flushedDescriptors_[VulkanContext::MAX_INFLIGHT_FRAMES]{}; - void FlushDescSets(VulkanContext *vulkan, int frame); + void FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile); }; class VulkanRenderManager { From ba4d1668ce0cc01b8ba2107360e9bf9445dd481d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 21:48:00 +0200 Subject: [PATCH 06/12] Don't forget to update descCount in tess mode --- GPU/Vulkan/DrawEngineVulkan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 0c1a00d86651..8d674d3e3e6c 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -754,6 +754,7 @@ void DrawEngineVulkan::DoFlush() { descriptors[j + 6].buffer.offset = bufInfo[j].offset; descriptors[j + 6].buffer.range = bufInfo[j].range; } + descCount = 9; } // TODO: Can we avoid binding all three when not needed? Same below for hardware transform. // Think this will require different descriptor set layouts. From 9c1c09ff5c40685a2090d0a8f8973ed6bf731b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 21:48:22 +0200 Subject: [PATCH 07/12] Remove commented out code --- GPU/Vulkan/DrawEngineVulkan.cpp | 141 -------------------------------- 1 file changed, 141 deletions(-) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 8d674d3e3e6c..2fd336155e34 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -272,147 +272,6 @@ void DrawEngineVulkan::DecodeVertsToPushPool(VulkanPushPool *push, uint32_t *bin DecodeVerts(dest); } -/* -VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) { - _dbg_assert_(base != VK_NULL_HANDLE); - _dbg_assert_(light != VK_NULL_HANDLE); - _dbg_assert_(bone != VK_NULL_HANDLE); - - DescriptorSetKey key{}; - key.imageView_ = imageView; - key.sampler_ = sampler; - key.secondaryImageView_ = boundSecondary_; - key.depalImageView_ = boundDepal_; - key.base_ = base; - key.light_ = light; - key.bone_ = bone; - - FrameData &frame = GetCurFrame(); - // See if we already have this descriptor set cached. - if (!tess) { // Don't cache descriptors for HW tessellation. - VkDescriptorSet d; - if (frame.descSets.Get(key, &d)) { - return d; - } - } - - // We just don't write to the slots we don't care about, which is fine. - VkWriteDescriptorSet writes[DRAW_BINDING_COUNT]{}; - // Main texture - int n = 0; - VkDescriptorImageInfo tex[3]{}; - if (imageView) { - _dbg_assert_(sampler != VK_NULL_HANDLE); - - tex[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tex[0].imageView = imageView; - tex[0].sampler = sampler; - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_TEXTURE; - writes[n].pImageInfo = &tex[0]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - writes[n].dstSet = desc; - n++; - } - - if (boundSecondary_) { - tex[1].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tex[1].imageView = boundSecondary_; - tex[1].sampler = samplerSecondaryNearest_; - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_2ND_TEXTURE; - writes[n].pImageInfo = &tex[1]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - writes[n].dstSet = desc; - n++; - } - - if (boundDepal_) { - tex[2].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tex[2].imageView = boundDepal_; - tex[2].sampler = boundDepalSmoothed_ ? samplerSecondaryLinear_ : samplerSecondaryNearest_; - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_DEPAL_TEXTURE; - writes[n].pImageInfo = &tex[2]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - writes[n].dstSet = desc; - n++; - } - - // Uniform buffer objects - VkDescriptorBufferInfo buf[3]{}; - int count = 0; - buf[count].buffer = base; - buf[count].offset = 0; - buf[count].range = sizeof(UB_VS_FS_Base); - count++; - buf[count].buffer = light; - buf[count].offset = 0; - buf[count].range = sizeof(UB_VS_Lights); - count++; - buf[count].buffer = bone; - buf[count].offset = 0; - buf[count].range = sizeof(UB_VS_Bones); - count++; - for (int i = 0; i < count; i++) { - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_DYNUBO_BASE + i; - writes[n].dstArrayElement = 0; - writes[n].pBufferInfo = &buf[i]; - writes[n].dstSet = desc; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - n++; - } - - // Tessellation data buffer. - if (tess) { - const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo(); - // Control Points - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF; - writes[n].pBufferInfo = &bufInfo[0]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[n].dstSet = desc; - n++; - // Weights U - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WU; - writes[n].pBufferInfo = &bufInfo[1]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[n].dstSet = desc; - n++; - // Weights V - writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[n].pNext = nullptr; - writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WV; - writes[n].pBufferInfo = &bufInfo[2]; - writes[n].descriptorCount = 1; - writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[n].dstSet = desc; - n++; - } - - VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); - vkUpdateDescriptorSets(vulkan->GetDevice(), n, writes, 0, nullptr); - - if (!tess) // Again, avoid caching when HW tessellation. - frame.descSets.Insert(key, desc); - return desc; -} -*/ - void DrawEngineVulkan::DirtyAllUBOs() { baseUBOOffset = 0; lightUBOOffset = 0; From 397745ce1485f7612c97029efb1bbb10095424c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 21:48:33 +0200 Subject: [PATCH 08/12] Remove unused code --- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 12 ++------ Common/GPU/Vulkan/VulkanQueueRunner.h | 2 -- Common/GPU/Vulkan/VulkanRenderManager.h | 38 +------------------------ 3 files changed, 3 insertions(+), 49 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 0b774b25690f..34f70ec545db 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -1338,11 +1338,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW_INDEXED: if (pipelineOK) { - VkDescriptorSet set; - if (c.drawIndexed.ds) - set = c.drawIndexed.ds; - else - set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + VkDescriptorSet set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets); vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16); VkDeviceSize voffset = c.drawIndexed.voffset; @@ -1353,11 +1349,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW: if (pipelineOK) { - VkDescriptorSet set; - if (c.drawIndexed.ds) - set = c.drawIndexed.ds; - else - set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + VkDescriptorSet set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; _dbg_assert_(set != VK_NULL_HANDLE); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets); if (c.draw.vbuffer) { diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index e26b33cf4ea5..2cbdd3277a3e 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -70,7 +70,6 @@ struct VkRenderData { VKRPipelineLayout *pipelineLayout; } compute_pipeline; struct { - VkDescriptorSet ds; uint32_t descSetIndex; int numUboOffsets; uint32_t uboOffsets[3]; @@ -80,7 +79,6 @@ struct VkRenderData { uint32_t offset; } draw; struct { - VkDescriptorSet ds; uint32_t descSetIndex; uint32_t uboOffsets[3]; uint16_t numUboOffsets; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 32472682d567..56fa19111efb 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -466,7 +466,7 @@ class VulkanRenderManager { PendingDescSet &descSet = curPipelineLayout_->descSets_[curFrame].push_uninitialized(); descSet.offset = (uint32_t)offset; descSet.count = count; - descSet.set = VK_NULL_HANDLE; // to be filled in + // descSet.set = VK_NULL_HANDLE; // to be filled in return setIndex; } @@ -478,7 +478,6 @@ class VulkanRenderManager { data.cmd = VKRRenderCommand::DRAW; data.draw.count = count; data.draw.offset = offset; - data.draw.ds = VK_NULL_HANDLE; data.draw.descSetIndex = setIndex; data.draw.vbuffer = vbuffer; data.draw.voffset = voffset; @@ -489,22 +488,6 @@ class VulkanRenderManager { curRenderStep_->render.numDraws++; } - void Draw(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) { - _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); - VkRenderData &data = curRenderStep_->commands.push_uninitialized(); - data.cmd = VKRRenderCommand::DRAW; - data.draw.count = count; - data.draw.offset = offset; - data.draw.ds = descSet; - data.draw.vbuffer = vbuffer; - data.draw.voffset = voffset; - data.draw.numUboOffsets = numUboOffsets; - _dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.draw.uboOffsets)); - for (int i = 0; i < numUboOffsets; i++) - data.draw.uboOffsets[i] = uboOffsets[i]; - curRenderStep_->render.numDraws++; - } - void DrawIndexed(const PackedDescriptor *desc, int descCount, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); int setIndex = BindDescriptors(desc, descCount); @@ -512,7 +495,6 @@ class VulkanRenderManager { data.cmd = VKRRenderCommand::DRAW_INDEXED; data.drawIndexed.count = count; data.drawIndexed.instances = numInstances; - data.drawIndexed.ds = VK_NULL_HANDLE; data.drawIndexed.descSetIndex = setIndex; data.drawIndexed.vbuffer = vbuffer; data.drawIndexed.voffset = voffset; @@ -525,24 +507,6 @@ class VulkanRenderManager { curRenderStep_->render.numDraws++; } - void DrawIndexed(VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) { - _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); - VkRenderData &data = curRenderStep_->commands.push_uninitialized(); - data.cmd = VKRRenderCommand::DRAW_INDEXED; - data.drawIndexed.count = count; - data.drawIndexed.instances = numInstances; - data.drawIndexed.ds = descSet; - data.drawIndexed.vbuffer = vbuffer; - data.drawIndexed.voffset = voffset; - data.drawIndexed.ibuffer = ibuffer; - data.drawIndexed.ioffset = ioffset; - data.drawIndexed.numUboOffsets = numUboOffsets; - _dbg_assert_(numUboOffsets <= ARRAY_SIZE(data.drawIndexed.uboOffsets)); - for (int i = 0; i < numUboOffsets; i++) - data.drawIndexed.uboOffsets[i] = uboOffsets[i]; - curRenderStep_->render.numDraws++; - } - // These can be useful both when inspecting in RenderDoc, and when manually inspecting recorded commands // in the debugger. void DebugAnnotate(const char *annotation) { From 2b0192d8187de29fe76094ebb82ef6c284db4512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 23:08:19 +0200 Subject: [PATCH 09/12] Have FrameData structs for each pipeline layout, instead of multiple arrays --- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 10 ++++---- Common/GPU/Vulkan/VulkanRenderManager.cpp | 26 ++++++++++++--------- Common/GPU/Vulkan/VulkanRenderManager.h | 28 +++++++++++++---------- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 34f70ec545db..2c5b5d55ef21 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -1199,7 +1199,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c // The stencil ones are very commonly mostly redundant so let's eliminate them where possible. // Might also want to consider scissor and viewport. VkPipeline lastPipeline = VK_NULL_HANDLE; - VKRPipelineLayout *vkrPipelineLayout = nullptr; + FastVec *descSets = nullptr; VkPipelineLayout pipelineLayout = VK_NULL_HANDLE; bool pipelineOK = false; @@ -1242,8 +1242,8 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c if (pipeline != VK_NULL_HANDLE) { vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - vkrPipelineLayout = c.pipeline.pipelineLayout; - pipelineLayout = vkrPipelineLayout->pipelineLayout; + descSets = &c.pipeline.pipelineLayout->frameData[curFrame].descSets_; + pipelineLayout = c.pipeline.pipelineLayout->pipelineLayout; lastGraphicsPipeline = graphicsPipeline; pipelineOK = true; } else { @@ -1338,7 +1338,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW_INDEXED: if (pipelineOK) { - VkDescriptorSet set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set; vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets); vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16); VkDeviceSize voffset = c.drawIndexed.voffset; @@ -1349,7 +1349,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c case VKRRenderCommand::DRAW: if (pipelineOK) { - VkDescriptorSet set = vkrPipelineLayout->descSets_[curFrame][c.drawIndexed.descSetIndex].set; + VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set; _dbg_assert_(set != VK_NULL_HANDLE); vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets); if (c.draw.vbuffer) { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 49e8c18c11b9..569e15b95459 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -1647,8 +1647,8 @@ VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindin vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag); for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - layout->descPools[i].Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 512); - layout->descPools[i].Setup([]() {}); + layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 512); + layout->frameData[i].pool.Setup([]() {}); } pipelineLayouts_.push_back(layout); @@ -1657,7 +1657,7 @@ VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindin void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) { for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - layout->descPools[i].Destroy(); + layout->frameData[i].pool.Destroy(); } vulkan_->Delete().QueueDeletePipelineLayout(layout->pipelineLayout); @@ -1678,18 +1678,22 @@ void VulkanRenderManager::FlushDescriptors(int frame) { void VulkanRenderManager::ResetDescriptorLists(int frame) { for (auto iter : pipelineLayouts_) { - iter->flushedDescriptors_[frame] = 0; - iter->descSets_[frame].clear(); - iter->descData_[frame].clear(); + VKRPipelineLayout::FrameData &data = iter->frameData[frame]; + + data.flushedDescriptors_ = 0; + data.descSets_.clear(); + data.descData_.clear(); } } void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) { _dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES); - VulkanDescSetPool &pool = descPools[frame]; - FastVec &descData = descData_[frame]; - FastVec &descSets = descSets_[frame]; + FrameData &data = frameData[frame]; + + VulkanDescSetPool &pool = data.pool; + FastVec &descData = data.descData_; + FastVec &descSets = data.descSets_; pool.Reset(); @@ -1702,7 +1706,7 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS]; - size_t start = flushedDescriptors_[frame]; + size_t start = data.flushedDescriptors_; int writeCount = 0; for (size_t index = start; index < descSets.size(); index++) { @@ -1788,6 +1792,6 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro writeCount++; } - flushedDescriptors_[frame] = (int)descSets.size(); + data.flushedDescriptors_ = (int)descSets.size(); profile->descriptorsWritten += writeCount; } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 56fa19111efb..9be8c4ca90ac 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -213,7 +213,7 @@ struct VKRPipelineLayout { VKRPipelineLayout() {} ~VKRPipelineLayout() { _assert_(!pipelineLayout && !descriptorSetLayout); - _assert_(descPools[0].IsDestroyed()); + _assert_(frameData[0].pool.IsDestroyed()); } enum { MAX_DESC_SET_BINDINGS = 10 }; BindingType bindingTypes[MAX_DESC_SET_BINDINGS]; @@ -224,14 +224,16 @@ struct VKRPipelineLayout { int pushConstSize = 0; const char *tag = nullptr; - // The pipeline layout owns the descriptor set pools. Don't go create excessive layouts. - VulkanDescSetPool descPools[VulkanContext::MAX_INFLIGHT_FRAMES]; + struct FrameData { + VulkanDescSetPool pool; + FastVec descData_; + FastVec descSets_; + // TODO: We should be able to get away with a single descData_/descSets_ and then send it along, + // but it's easier to just segregate by frame id. + int flushedDescriptors_ = 0; + }; - // TODO: We should be able to get away with a single descData_/descSets_ and then send it along, - // but it's easier to just segregate by frame id. - FastVec descData_[VulkanContext::MAX_INFLIGHT_FRAMES]; - FastVec descSets_[VulkanContext::MAX_INFLIGHT_FRAMES]; - int flushedDescriptors_[VulkanContext::MAX_INFLIGHT_FRAMES]{}; + FrameData frameData[VulkanContext::MAX_INFLIGHT_FRAMES]; void FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile); }; @@ -459,11 +461,13 @@ class VulkanRenderManager { int curFrame = vulkan_->GetCurFrame(); - size_t offset = curPipelineLayout_->descData_[curFrame].size(); - curPipelineLayout_->descData_[curFrame].extend(desc, count); + VKRPipelineLayout::FrameData &data = curPipelineLayout_->frameData[curFrame]; + + size_t offset = data.descData_.size(); + data.descData_.extend(desc, count); - int setIndex = (int)curPipelineLayout_->descSets_[curFrame].size(); - PendingDescSet &descSet = curPipelineLayout_->descSets_[curFrame].push_uninitialized(); + int setIndex = (int)data.descSets_.size(); + PendingDescSet &descSet = data.descSets_.push_uninitialized(); descSet.offset = (uint32_t)offset; descSet.count = count; // descSet.set = VK_NULL_HANDLE; // to be filled in From b38b46df0f34fd8209752fc36e8f937f3891fab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 9 Oct 2023 23:20:27 +0200 Subject: [PATCH 10/12] Update comments --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 569e15b95459..6ec9ca4a9e47 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -1698,8 +1698,7 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro pool.Reset(); // This will write all descriptors. - // Initially, we won't do any de-duplication, so no hashmap lookups but also extra cost of writing additional descriptors. - // A short look-back might be enough? + // Initially, we just do a simple look-back comparing to the previous descriptor to avoid sequential dupes. // Initially, let's do naive single desc set writes. VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS]; @@ -1712,9 +1711,9 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro for (size_t index = start; index < descSets.size(); index++) { auto &d = descSets[index]; - // TODO: This is where to look up to see if we already have an identical descriptor previously in the array. - // We can do this with a simple custom hash map here that doesn't handle collisions, since false positives aren't too bad. - // Should probably check history, one or two items, then fall back to lookup. Or we should do the history lookup in BindDescriptors... + // This is where we look up to see if we already have an identical descriptor previously in the array. + // We could do a simple custom hash map here that doesn't handle collisions, since false positives aren't too bad. + // Instead, for now we just check history one item backwards. Good enough, it seems. if (index > start + 1) { if (descSets[index - 1].count == d.count) { if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) { @@ -1724,9 +1723,10 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro } } - // For now we just allocate unconditionally. + // TODO: Allocate in batches. d.set = pool.Allocate(1, &descriptorSetLayout, nullptr); + // TODO: Build up bigger batches of writes. const PackedDescriptor *data = descData.begin() + d.offset; int numWrites = 0; int numBuffers = 0; From 9fdc7e237299755f2dd20934c39b07add6b9ee00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 10 Oct 2023 08:59:47 +0200 Subject: [PATCH 11/12] Address feedback --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 5 +++-- Common/UI/Context.cpp | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 6ec9ca4a9e47..f6af92c35c20 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -1647,7 +1647,8 @@ VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindin vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag); for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { - layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 512); + // Some games go beyond 1024 and end up having to resize like GTA, but most stay below so we start there. + layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 1024); layout->frameData[i].pool.Setup([]() {}); } @@ -1712,7 +1713,7 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro auto &d = descSets[index]; // This is where we look up to see if we already have an identical descriptor previously in the array. - // We could do a simple custom hash map here that doesn't handle collisions, since false positives aren't too bad. + // We could do a simple custom hash map here that doesn't handle collisions, since those won't matter. // Instead, for now we just check history one item backwards. Good enough, it seems. if (index > start + 1) { if (descSets[index - 1].count == d.count) { diff --git a/Common/UI/Context.cpp b/Common/UI/Context.cpp index a660ece7658f..31b59419ed67 100644 --- a/Common/UI/Context.cpp +++ b/Common/UI/Context.cpp @@ -83,6 +83,8 @@ void UIContext::BeginPipeline(Draw::Pipeline *pipeline, Draw::SamplerState *samp // Also clear out any other textures bound. Draw::SamplerState *samplers[3]{ samplerState }; draw_->BindSamplerStates(0, 3, samplers); + Draw::Texture *textures[2]{}; + draw_->BindTextures(1, 2, textures); RebindTexture(); UIBegin(pipeline); } From 3d949b080dd4a9f7f84a76332b3ec012046ae322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 10 Oct 2023 09:14:10 +0200 Subject: [PATCH 12/12] Prepare VulkanDescSetPool for block allocation --- Common/GPU/Vulkan/VulkanDescSet.cpp | 21 ++++++++------------- Common/GPU/Vulkan/VulkanDescSet.h | 2 +- Common/GPU/Vulkan/VulkanRenderManager.cpp | 2 +- GPU/Vulkan/VulkanUtil.cpp | 3 ++- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanDescSet.cpp b/Common/GPU/Vulkan/VulkanDescSet.cpp index 05ea9eadb65b..cb85dd8425ec 100644 --- a/Common/GPU/Vulkan/VulkanDescSet.cpp +++ b/Common/GPU/Vulkan/VulkanDescSet.cpp @@ -42,19 +42,18 @@ void VulkanDescSetPool::Create(VulkanContext *vulkan, const BindingType *binding _assert_msg_(res == VK_SUCCESS, "Could not create VulkanDescSetPool %s", tag_); } -VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag) { - if (descPool_ == VK_NULL_HANDLE || usage_ + n >= info_.maxSets) { +bool VulkanDescSetPool::Allocate(VkDescriptorSet *descriptorSets, int count, const VkDescriptorSetLayout *layouts) { + if (descPool_ == VK_NULL_HANDLE || usage_ + count >= info_.maxSets) { // Missing or out of space, need to recreate. VkResult res = Recreate(grow_); _assert_msg_(res == VK_SUCCESS, "Could not grow VulkanDescSetPool %s on usage %d", tag_, (int)usage_); } - VkDescriptorSet desc; VkDescriptorSetAllocateInfo descAlloc{ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO }; descAlloc.descriptorPool = descPool_; - descAlloc.descriptorSetCount = n; + descAlloc.descriptorSetCount = count; descAlloc.pSetLayouts = layouts; - VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc); + VkResult result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, descriptorSets); if (result == VK_ERROR_FRAGMENTED_POOL || result < 0) { WARN_LOG(G3D, "Pool %s %s - recreating", tag_, result == VK_ERROR_FRAGMENTED_POOL ? "fragmented" : "full"); @@ -66,20 +65,16 @@ VkDescriptorSet VulkanDescSetPool::Allocate(int n, const VkDescriptorSetLayout * // Need to update this pointer since we have allocated a new one. descAlloc.descriptorPool = descPool_; - result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, &desc); + result = vkAllocateDescriptorSets(vulkan_->GetDevice(), &descAlloc, descriptorSets); _assert_msg_(result == VK_SUCCESS, "Ran out of descriptor space (frag?) and failed to allocate after recreating a descriptor pool. res=%d", (int)result); } if (result != VK_SUCCESS) { - return VK_NULL_HANDLE; + return false; } - usage_++; - - if (tag) { - vulkan_->SetDebugName(desc, VK_OBJECT_TYPE_DESCRIPTOR_SET, tag); - } - return desc; + usage_ += count; + return true; } void VulkanDescSetPool::Reset() { diff --git a/Common/GPU/Vulkan/VulkanDescSet.h b/Common/GPU/Vulkan/VulkanDescSet.h index 82007962a36d..3ed4673240a7 100644 --- a/Common/GPU/Vulkan/VulkanDescSet.h +++ b/Common/GPU/Vulkan/VulkanDescSet.h @@ -28,7 +28,7 @@ class VulkanDescSetPool { void Create(VulkanContext *vulkan, const BindingType *bindingTypes, uint32_t bindingTypesCount, uint32_t descriptorCount); // Allocate a new set, which may resize and empty the current sets. // Use only for the current frame, unless in a cache cleared by clear_. - VkDescriptorSet Allocate(int n, const VkDescriptorSetLayout *layouts, const char *tag); + bool Allocate(VkDescriptorSet *descriptorSets, int count, const VkDescriptorSetLayout *layouts); void Reset(); void Destroy(); diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index f6af92c35c20..e4dc0a20a66e 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -1725,7 +1725,7 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro } // TODO: Allocate in batches. - d.set = pool.Allocate(1, &descriptorSetLayout, nullptr); + pool.Allocate(&d.set, 1, &descriptorSetLayout); // TODO: Build up bigger batches of writes. const PackedDescriptor *data = descData.begin() + d.offset; diff --git a/GPU/Vulkan/VulkanUtil.cpp b/GPU/Vulkan/VulkanUtil.cpp index 8fc286f97c21..f100ad24db5d 100644 --- a/GPU/Vulkan/VulkanUtil.cpp +++ b/GPU/Vulkan/VulkanUtil.cpp @@ -133,7 +133,8 @@ VkDescriptorSet VulkanComputeShaderManager::GetDescriptorSet(VkImageView image, int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; frameData.descPoolUsed = true; - VkDescriptorSet desc = frameData.descPool.Allocate(1, &descriptorSetLayout_, "compute_descset"); + VkDescriptorSet desc; + frameData.descPool.Allocate(&desc, 1, &descriptorSetLayout_); _assert_(desc != VK_NULL_HANDLE); VkWriteDescriptorSet writes[3]{};