Skip to content

Commit

Permalink
Merge pull request #15944 from hrydgard/vulkan-shader-blending
Browse files Browse the repository at this point in the history
Shader blending in Vulkan through input attachments
  • Loading branch information
hrydgard committed Sep 16, 2022
2 parents 7c2b4b6 + 262a306 commit 143be81
Show file tree
Hide file tree
Showing 31 changed files with 289 additions and 107 deletions.
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanBarrier.cpp
Expand Up @@ -4,7 +4,7 @@

void VulkanBarrier::Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, 0, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
}
imageBarriers_.clear();
srcStageMask_ = 0;
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanBarrier.h
Expand Up @@ -21,6 +21,7 @@ class VulkanBarrier {
) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;

VkImageMemoryBarrier imageBarrier;
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
Expand Down Expand Up @@ -112,4 +113,5 @@ class VulkanBarrier {
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
std::vector<VkImageMemoryBarrier> imageBarriers_;
VkDependencyFlags dependencyFlags_ = 0;
};
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/VulkanContext.cpp
Expand Up @@ -667,7 +667,10 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.KHR_create_renderpass2 = true;
extensionsLookup_.KHR_depth_stencil_resolve = EnableDeviceExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
}

extensionsLookup_.EXT_shader_stencil_export = EnableDeviceExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);

VkDeviceCreateInfo device_info{ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
device_info.queueCreateInfoCount = 1;
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/Vulkan/VulkanDebug.cpp
Expand Up @@ -86,11 +86,11 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
} else {
WARN_LOG(G3D, "VKDEBUG: %s", msg.c_str());
}

// false indicates that layer should not bail-out of an
// API call that had validation failures. This may mean that the
// app dies inside the driver due to invalid parameter(s).
// That's what would happen without validation layers, so we'll
// keep that behavior here.
return false;
}

2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanLoader.h
Expand Up @@ -241,6 +241,8 @@ struct VulkanExtensions {
bool KHR_depth_stencil_resolve;
bool EXT_shader_stencil_export;
bool EXT_swapchain_colorspace;
bool ARM_rasterization_order_attachment_access;
bool EXT_fragment_shader_interlock;
// bool EXT_depth_range_unrestricted; // Allows depth outside [0.0, 1.0] in 32-bit float depth buffers.
};

Expand Down
92 changes: 80 additions & 12 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Expand Up @@ -34,8 +34,11 @@ RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
if (a == b) {
// Trivial merging case.
return a;
} else if (a == RP_TYPE_COLOR_DEPTH && b == RP_TYPE_COLOR_DEPTH_INPUT) {
return RP_TYPE_COLOR_DEPTH_INPUT;
} else if (a == RP_TYPE_COLOR_DEPTH_INPUT && b == RP_TYPE_COLOR_DEPTH) {
return RP_TYPE_COLOR_DEPTH_INPUT;
}
// More cases to be added later.
return a;
}

Expand Down Expand Up @@ -155,7 +158,12 @@ static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) {
return VK_ATTACHMENT_STORE_OP_DONT_CARE; // avoid compiler warning
}

// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies

VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rpType) {
bool selfDependency = rpType == RP_TYPE_COLOR_DEPTH_INPUT;

VkAttachmentDescription attachments[2] = {};
attachments[0].format = rpType == RP_TYPE_BACKBUFFER ? vulkan->GetSwapchainFormat() : VK_FORMAT_R8G8B8A8_UNORM;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
Expand All @@ -179,7 +187,7 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp

VkAttachmentReference color_reference{};
color_reference.attachment = 0;
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_reference.layout = selfDependency ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkAttachmentReference depth_reference{};
depth_reference.attachment = 1;
Expand All @@ -188,8 +196,13 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
VkSubpassDescription subpass{};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
if (selfDependency) {
subpass.inputAttachmentCount = 1;
subpass.pInputAttachments = &color_reference;
} else {
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = nullptr;
}
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &color_reference;
subpass.pResolveAttachments = nullptr;
Expand All @@ -198,22 +211,40 @@ VkRenderPass CreateRP(VulkanContext *vulkan, const RPKey &key, RenderPassType rp
subpass.pPreserveAttachments = nullptr;

// Not sure if this is really necessary.
VkSubpassDependency dep{};
dep.srcSubpass = VK_SUBPASS_EXTERNAL;
dep.dstSubpass = 0;
dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dep.srcAccessMask = 0;
dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkSubpassDependency deps[2]{};
size_t numDeps = 0;

VkRenderPassCreateInfo rp{ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO };
rp.attachmentCount = 2;
rp.pAttachments = attachments;
rp.subpassCount = 1;
rp.pSubpasses = &subpass;

if (rpType == RP_TYPE_BACKBUFFER) {
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].srcAccessMask = 0;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
numDeps++;
rp.dependencyCount = 1;
rp.pDependencies = &dep;
}

if (selfDependency) {
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
deps[numDeps].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
deps[numDeps].srcSubpass = 0;
deps[numDeps].dstSubpass = 0;
numDeps++;
}

if (numDeps > 0) {
rp.dependencyCount = (u32)numDeps;
rp.pDependencies = deps;
}

VkRenderPass pass;
Expand Down Expand Up @@ -246,6 +277,30 @@ VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
return pass;
}

// Must match the subpass self-dependency declared above.
void VulkanQueueRunner::SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier) {
if (aspect & VK_IMAGE_ASPECT_COLOR_BIT) {
VkAccessFlags srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkAccessFlags dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
recordBarrier->TransitionImage(
img.image,
0,
1,
aspect,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_GENERAL,
srcAccessMask,
dstAccessMask,
srcStageMask,
dstStageMask
);
} else {
_assert_msg_(false, "Depth self-dependencies not yet supported");
}
}

void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
// Optimizes renderpasses, then sequences them.
// Planned optimizations:
Expand Down Expand Up @@ -628,6 +683,7 @@ std::string VulkanQueueRunner::StepToString(const VKRStep &step) const {
switch (step.render.renderPassType) {
case RP_TYPE_BACKBUFFER: renderCmd = "BACKBUF"; break;
case RP_TYPE_COLOR_DEPTH: renderCmd = "RENDER"; break;
case RP_TYPE_COLOR_DEPTH_INPUT: renderCmd = "RENDER_INPUT"; break;
default: renderCmd = "N/A";
}
snprintf(buffer, sizeof(buffer), "%s %s (draws: %d, %dx%d/%dx%d, fb: %p, )", renderCmd, step.tag, step.render.numDraws, actual_w, actual_h, w, h, step.render.framebuffer);
Expand Down Expand Up @@ -817,6 +873,9 @@ void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
case VKRRenderCommand::REMOVED:
INFO_LOG(G3D, " (Removed)");
break;
case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
INFO_LOG(G3D, " SelfBarrier()");
break;
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
INFO_LOG(G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
break;
Expand Down Expand Up @@ -1235,6 +1294,15 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
break;
}

case VKRRenderCommand::SELF_DEPENDENCY_BARRIER:
{
_assert_(step.render.pipelineFlags & PipelineFlags::USES_INPUT_ATTACHMENT);
VulkanBarrier barrier;
SelfDependencyBarrier(step.render.framebuffer->color, VK_IMAGE_ASPECT_COLOR_BIT, &barrier);
barrier.Flush(cmd);
break;
}

case VKRRenderCommand::PUSH_CONSTANTS:
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
break;
Expand Down
20 changes: 12 additions & 8 deletions Common/GPU/Vulkan/VulkanQueueRunner.h
Expand Up @@ -20,7 +20,6 @@ struct VKRImage;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
// Killzone PR = 4.
QUEUE_HACK_RENDERPASS_MERGE = 8,
};

Expand All @@ -36,20 +35,24 @@ enum class VKRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
SELF_DEPENDENCY_BARRIER,
NUM_RENDER_COMMANDS,
};

enum PipelineFlags {
PIPELINE_FLAG_NONE = 0,
PIPELINE_FLAG_USES_LINES = (1 << 2),
PIPELINE_FLAG_USES_BLEND_CONSTANT = (1 << 3),
PIPELINE_FLAG_USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
enum class PipelineFlags {
NONE = 0,
USES_LINES = (1 << 2),
USES_BLEND_CONSTANT = (1 << 3),
USES_DEPTH_STENCIL = (1 << 4), // Reads or writes the depth buffer.
USES_INPUT_ATTACHMENT = (1 << 5),
};
ENUM_CLASS_BITOPS(PipelineFlags);

// Pipelines need to be created for the right type of render pass.
enum RenderPassType {
RP_TYPE_BACKBUFFER,
RP_TYPE_COLOR_DEPTH,
RP_TYPE_COLOR_DEPTH_INPUT,
// Later will add pure-color render passes.
RP_TYPE_COUNT,
};
Expand Down Expand Up @@ -168,7 +171,6 @@ struct VKRStep {
union {
struct {
VKRFramebuffer *framebuffer;
// TODO: Look these up through renderPass?
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
Expand All @@ -183,7 +185,7 @@ struct VKRStep {
int numReads;
VkImageLayout finalColorLayout;
VkImageLayout finalDepthStencilLayout;
u32 pipelineFlags;
PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
VkRect2D renderArea;
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
Expand Down Expand Up @@ -324,6 +326,8 @@ class VulkanQueueRunner {
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

static void SelfDependencyBarrier(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);

VulkanContext *vulkan_;

VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
Expand Down
16 changes: 13 additions & 3 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Expand Up @@ -223,7 +223,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
// Strictly speaking we don't yet need VK_IMAGE_USAGE_SAMPLED_BIT for depth buffers since we do not yet sample depth buffers.
ici.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (color) {
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
ici.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
} else {
ici.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
Expand Down Expand Up @@ -534,7 +534,9 @@ void VulkanRenderManager::CompileThreadFunc() {
break;
}

INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
if (!toCompile.empty()) {
INFO_LOG(G3D, "Compilation thread has %d pipelines to create", (int)toCompile.size());
}

// TODO: Here we can sort the pending pipelines by vertex and fragment shaders,
// and split up further.
Expand Down Expand Up @@ -774,6 +776,9 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
} else if (curPipelineFlags_ & PipelineFlags::USES_INPUT_ATTACHMENT) {
// Not allowed on backbuffers.
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
}

VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
Expand Down Expand Up @@ -806,7 +811,12 @@ void VulkanRenderManager::EndCurRenderStep() {

// We no longer have a current render step.
curRenderStep_ = nullptr;
curPipelineFlags_ = 0;
curPipelineFlags_ = (PipelineFlags)0;
}

void VulkanRenderManager::BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits) {
_dbg_assert_(curRenderStep_);
curRenderStep_->commands.push_back(VkRenderData{ VKRRenderCommand::SELF_DEPENDENCY_BARRIER });
}

void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
Expand Down
4 changes: 3 additions & 1 deletion Common/GPU/Vulkan/VulkanRenderManager.h
Expand Up @@ -236,6 +236,8 @@ class VulkanRenderManager {
// as the other backends, even though there's no actual binding happening here.
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBits, int attachment);

void BindCurrentFramebufferAsInputAttachment0(VkImageAspectFlags aspectBits);

bool CopyFramebufferToMemorySync(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);
void CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag);

Expand Down Expand Up @@ -544,7 +546,7 @@ class VulkanRenderManager {
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;
bool curStepHasScissor_ = false;
u32 curPipelineFlags_ = 0;
PipelineFlags curPipelineFlags_{};
BoundingRect curRenderArea_;

std::vector<VKRStep *> steps_;
Expand Down

0 comments on commit 143be81

Please sign in to comment.