Skip to content

Commit

Permalink
Merge pull request #18642 from hrydgard/vulkan-barrier-optimization
Browse files Browse the repository at this point in the history
Vulkan barrier optimizations
  • Loading branch information
hrydgard committed Dec 30, 2023
2 parents a023dc9 + de4c04e commit b0c0a0f
Show file tree
Hide file tree
Showing 12 changed files with 284 additions and 171 deletions.
94 changes: 94 additions & 0 deletions Common/GPU/Vulkan/VulkanBarrier.cpp
Expand Up @@ -11,3 +11,97 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) {
dstStageMask_ = 0;
dependencyFlags_ = 0;
}

void VulkanBarrier::TransitionImage(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout,
VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask
) {
_dbg_assert_(image != VK_NULL_HANDLE);

srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;

VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}

void VulkanBarrier::TransitionImageAuto(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout) {
_dbg_assert_(image != VK_NULL_HANDLE);

VkAccessFlags srcAccessMask = 0;
VkAccessFlags dstAccessMask = 0;
switch (oldImageLayout) {
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_UNDEFINED:
// Actually this seems wrong?
if (aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_assert_msg_(false, "Unexpected oldLayout: %d", (int)oldImageLayout);
break;
}

switch (newImageLayout) {
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dstStageMask_ |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
_assert_msg_(false, "Unexpected newLayout: %d", (int)newImageLayout);
break;
}

VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
137 changes: 50 additions & 87 deletions Common/GPU/Vulkan/VulkanBarrier.h
Expand Up @@ -6,111 +6,74 @@
#include "Common/Log.h"
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/Data/Collections/TinySet.h"

class VulkanContext;

class VulkanBarrierBatch {
public:
~VulkanBarrierBatch() {
_dbg_assert_(imageBarriers_.empty());
}

VkImageMemoryBarrier *Add(VkImage image, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= dependencyFlags;
VkImageMemoryBarrier &barrier = imageBarriers_.push_uninitialized();
// Initialize good defaults for the usual things.
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.layerCount = 1;
barrier.subresourceRange.levelCount = 1;
barrier.image = image;
return &barrier;
}

void Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
imageBarriers_.clear();
srcStageMask_ = 0;
dstStageMask_ = 0;
dependencyFlags_ = 0;
}
}

bool empty() const { return imageBarriers_.empty(); }

private:
FastVec<VkImageMemoryBarrier> imageBarriers_;
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
VkDependencyFlags dependencyFlags_ = 0;
};

// Collects multiple barriers into one, then flushes it.
// Reusable after a flush, in case you want to reuse the allocation made by the vector.
// However, not thread safe in any way!
class VulkanBarrier {
public:
VulkanBarrier() : imageBarriers_(4) {}

bool empty() const { return imageBarriers_.empty(); }

void TransitionImage(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout,
VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask
) {
_dbg_assert_(image != VK_NULL_HANDLE);

srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;

VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
);

// Automatically determines access and stage masks from layouts.
// Not universally usable, but works for PPSSPP's use.
void TransitionImageAuto(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout
) {
_dbg_assert_(image != VK_NULL_HANDLE);

VkAccessFlags srcAccessMask = 0;
VkAccessFlags dstAccessMask = 0;
switch (oldImageLayout) {
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_UNDEFINED:
// Actually this seems wrong?
if (aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_assert_msg_(false, "Unexpected oldLayout: %d", (int)oldImageLayout);
break;
}

switch (newImageLayout) {
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dstStageMask_ |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
_assert_msg_(false, "Unexpected newLayout: %d", (int)newImageLayout);
break;
}

VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
void TransitionImageAuto(VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout);

void Flush(VkCommandBuffer cmd);

Expand Down
9 changes: 9 additions & 0 deletions Common/GPU/Vulkan/VulkanDebug.cpp
Expand Up @@ -87,6 +87,15 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// False positive
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/3615
return false;

case 1835555994: // [AMD] [NVIDIA] Performance warning : Pipeline VkPipeline 0xa808d50000000033[global_texcolor] was bound twice in the frame.
// Benign perf warnings.
return false;

case 1810669668:
// Clear value but no LOAD_OP_CLEAR. Not worth fixing right now.
return false;

default:
break;
}
Expand Down
24 changes: 14 additions & 10 deletions Common/GPU/Vulkan/VulkanFramebuffer.cpp
Expand Up @@ -56,23 +56,23 @@ void VKRImage::Delete(VulkanContext *vulkan) {
}
}

VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VulkanBarrierBatch *barriers, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
: vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) {

_dbg_assert_(tag);

CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, barriers, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
if (createDepthStencilBuffer) {
CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
CreateImage(vulkan_, barriers, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
}

if (_multiSampleLevel > 0) {
sampleCount = MultiSampleLevelToFlagBits(_multiSampleLevel);

// TODO: Create a different tag for these?
CreateImage(vulkan_, initCmd, msaaColor, width, height, numLayers, sampleCount, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, barriers, initCmd, msaaColor, width, height, numLayers, sampleCount, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
if (createDepthStencilBuffer) {
CreateImage(vulkan_, initCmd, msaaDepth, width, height, numLayers, sampleCount, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
CreateImage(vulkan_, barriers, initCmd, msaaDepth, width, height, numLayers, sampleCount, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
}
} else {
sampleCount = VK_SAMPLE_COUNT_1_BIT;
Expand Down Expand Up @@ -161,7 +161,7 @@ VKRFramebuffer::~VKRFramebuffer() {

// NOTE: If numLayers > 1, it will create an array texture, rather than a normal 2D texture.
// This requires a different sampling path!
void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VulkanBarrierBatch *barriers, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
// We don't support more exotic layer setups for now. Mono or stereo.
_dbg_assert_(numLayers == 1 || numLayers == 2);

Expand Down Expand Up @@ -251,10 +251,14 @@ void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKR
return;
}

TransitionImageLayout2(cmd, img.image, 0, 1, numLayers, aspects,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage,
0, dstAccessMask);
VkImageMemoryBarrier *barrier = barriers->Add(img.image, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage, 0);
barrier->subresourceRange.layerCount = numLayers;
barrier->subresourceRange.aspectMask = aspects;
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier->newLayout = initialLayout;
barrier->srcAccessMask = 0;
barrier->dstAccessMask = dstAccessMask;

img.layout = initialLayout;
img.format = format;
img.sampleCount = sampleCount;
Expand Down
5 changes: 3 additions & 2 deletions Common/GPU/Vulkan/VulkanFramebuffer.h
Expand Up @@ -4,6 +4,7 @@
#include "Common/GPU/Vulkan/VulkanContext.h"

class VKRRenderPass;
class VulkanBarrierBatch;

// Pipelines need to be created for the right type of render pass.
// TODO: Rename to RenderPassFlags?
Expand Down Expand Up @@ -57,7 +58,7 @@ struct VKRImage {

class VKRFramebuffer {
public:
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
VKRFramebuffer(VulkanContext *vk, VulkanBarrierBatch *barriers, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
~VKRFramebuffer();

VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType);
Expand Down Expand Up @@ -94,7 +95,7 @@ class VKRFramebuffer {

VulkanContext *Vulkan() const { return vulkan_; }
private:
static void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag);
static void CreateImage(VulkanContext *vulkan, VulkanBarrierBatch *barriers, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag);

VkFramebuffer framebuf[(size_t)RenderPassType::TYPE_COUNT]{};

Expand Down

0 comments on commit b0c0a0f

Please sign in to comment.