53 changes: 36 additions & 17 deletions Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
Expand Up @@ -105,9 +105,9 @@ void SHADER::SetProgramVariables()
if (VSBlock_id != -1)
glUniformBlockBinding(glprogid, VSBlock_id, 2);
if (GSBlock_id != -1)
glUniformBlockBinding(glprogid, GSBlock_id, 3);
glUniformBlockBinding(glprogid, GSBlock_id, 4);
if (UBERBlock_id != -1)
glUniformBlockBinding(glprogid, UBERBlock_id, 4);
glUniformBlockBinding(glprogid, UBERBlock_id, 5);

// Bind Texture Samplers
for (int a = 0; a < 8; ++a)
Expand Down Expand Up @@ -232,35 +232,54 @@ void ProgramShaderCache::UploadConstants()
auto& pixel_shader_manager = system.GetPixelShaderManager();
auto& vertex_shader_manager = system.GetVertexShaderManager();
auto& geometry_shader_manager = system.GetGeometryShaderManager();
if (pixel_shader_manager.dirty || vertex_shader_manager.dirty || geometry_shader_manager.dirty)
if (pixel_shader_manager.dirty || vertex_shader_manager.dirty || geometry_shader_manager.dirty ||
pixel_shader_manager.custom_constants_dirty)
{
auto buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align);
const u32 custom_constants_size = static_cast<u32>(
Common::AlignUp(pixel_shader_manager.custom_constants.size(), s_ubo_align));
auto buffer = s_buffer->Map(s_ubo_buffer_size + custom_constants_size, s_ubo_align);

memcpy(buffer.first, &pixel_shader_manager.constants, sizeof(PixelShaderConstants));

memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
&vertex_shader_manager.constants, sizeof(VertexShaderConstants));
u64 size = Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align);

memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
&geometry_shader_manager.constants, sizeof(GeometryShaderConstants));
memcpy(buffer.first + size, &vertex_shader_manager.constants, sizeof(VertexShaderConstants));
size += Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align);

if (!pixel_shader_manager.custom_constants.empty())
{
memcpy(buffer.first + size, pixel_shader_manager.custom_constants.data(),
pixel_shader_manager.custom_constants.size());
size += custom_constants_size;
}

memcpy(buffer.first + size, &geometry_shader_manager.constants,
sizeof(GeometryShaderConstants));

s_buffer->Unmap(s_ubo_buffer_size + custom_constants_size);

s_buffer->Unmap(s_ubo_buffer_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->m_buffer, buffer.second,
sizeof(PixelShaderConstants));
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer,
buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align),
size = Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align);
glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer, buffer.second + size,
sizeof(VertexShaderConstants));
glBindBufferRange(GL_UNIFORM_BUFFER, 3, s_buffer->m_buffer,
buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) +
Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align),
size += Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align);

if (!pixel_shader_manager.custom_constants.empty())
{
glBindBufferRange(GL_UNIFORM_BUFFER, 3, s_buffer->m_buffer, buffer.second + size,
pixel_shader_manager.custom_constants.size());
size += Common::AlignUp(pixel_shader_manager.custom_constants.size(), s_ubo_align);
}
glBindBufferRange(GL_UNIFORM_BUFFER, 4, s_buffer->m_buffer, buffer.second + size,
sizeof(GeometryShaderConstants));

pixel_shader_manager.dirty = false;
vertex_shader_manager.dirty = false;
geometry_shader_manager.dirty = false;
pixel_shader_manager.custom_constants_dirty = false;

ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, s_ubo_buffer_size);
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, s_ubo_buffer_size + custom_constants_size);
}
}

Expand All @@ -273,7 +292,7 @@ void ProgramShaderCache::UploadConstants(const void* data, u32 data_size)
s_buffer->Unmap(alloc_size);

// bind the same sub-buffer to all stages
for (u32 index = 1; index <= 3; index++)
for (u32 index = 1; index <= 4; index++)
glBindBufferRange(GL_UNIFORM_BUFFER, index, s_buffer->m_buffer, buffer.second, data_size);

ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, data_size);
Expand Down
3 changes: 2 additions & 1 deletion Source/Core/VideoBackends/Vulkan/Constants.h
Expand Up @@ -38,7 +38,7 @@ enum DESCRIPTOR_SET_LAYOUT

// We use four pipeline layouts:
// - Standard
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2]
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-3]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
// - 1 SSBO accessible from PS if supported [set=2, binding=0]
// - Uber
Expand Down Expand Up @@ -70,6 +70,7 @@ enum UNIFORM_BUFFER_DESCRIPTOR_SET_BINDING
{
UBO_DESCRIPTOR_SET_BINDING_PS,
UBO_DESCRIPTOR_SET_BINDING_VS,
UBO_DESCRIPTOR_SET_BINDING_PS_CUST,
UBO_DESCRIPTOR_SET_BINDING_GS,
NUM_UBO_DESCRIPTOR_SET_BINDINGS
};
Expand Down
6 changes: 4 additions & 2 deletions Source/Core/VideoBackends/Vulkan/ObjectCache.cpp
Expand Up @@ -110,11 +110,13 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{
// The geometry shader buffer must be last in this binding set, as we don't include it
// if geometry shaders are not supported by the device. See the decrement below.
static const std::array<VkDescriptorSetLayoutBinding, 3> standard_ubo_bindings{{
static const std::array<VkDescriptorSetLayoutBinding, 4> standard_ubo_bindings{{
{UBO_DESCRIPTOR_SET_BINDING_PS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_FRAGMENT_BIT},
{UBO_DESCRIPTOR_SET_BINDING_VS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT},
{UBO_DESCRIPTOR_SET_BINDING_PS_CUST, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_FRAGMENT_BIT},
{UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_GEOMETRY_BIT},
}};
Expand Down Expand Up @@ -170,7 +172,7 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{18, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
}};

std::array<VkDescriptorSetLayoutBinding, 3> ubo_bindings = standard_ubo_bindings;
std::array<VkDescriptorSetLayoutBinding, 4> ubo_bindings = standard_ubo_bindings;

std::array<VkDescriptorSetLayoutCreateInfo, NUM_DESCRIPTOR_SET_LAYOUTS> create_infos{{
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
Expand Down
6 changes: 6 additions & 0 deletions Source/Core/VideoBackends/Vulkan/StateTracker.cpp
Expand Up @@ -494,6 +494,12 @@ void StateTracker::UpdateGXDescriptorSet()
continue;
}

// If custom pixel shaders haven't been used, their buffer range is 0
if (i == UBO_DESCRIPTOR_SET_BINDING_PS_CUST && m_bindings.gx_ubo_bindings[i].range == 0)
{
continue;
}

writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
m_gx_descriptor_sets[0],
Expand Down
66 changes: 53 additions & 13 deletions Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp
Expand Up @@ -243,22 +243,43 @@ void VertexManager::UpdatePixelShaderConstants()
auto& system = Core::System::GetInstance();
auto& pixel_shader_manager = system.GetPixelShaderManager();

if (!pixel_shader_manager.dirty || !ReserveConstantStorage())
if (!ReserveConstantStorage())
return;

StateTracker::GetInstance()->SetGXUniformBuffer(
UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset(), sizeof(PixelShaderConstants));
std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &pixel_shader_manager.constants,
sizeof(PixelShaderConstants));
m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants));
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, sizeof(PixelShaderConstants));
pixel_shader_manager.dirty = false;
if (pixel_shader_manager.dirty)
{
StateTracker::GetInstance()->SetGXUniformBuffer(
UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset(), sizeof(PixelShaderConstants));
std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &pixel_shader_manager.constants,
sizeof(PixelShaderConstants));
m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants));
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, sizeof(PixelShaderConstants));
pixel_shader_manager.dirty = false;
}

if (pixel_shader_manager.custom_constants_dirty)
{
StateTracker::GetInstance()->SetGXUniformBuffer(
UBO_DESCRIPTOR_SET_BINDING_PS_CUST, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset(),
static_cast<u32>(pixel_shader_manager.custom_constants.size()));
std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(),
pixel_shader_manager.custom_constants.data(),
pixel_shader_manager.custom_constants.size());
m_uniform_stream_buffer->CommitMemory(
static_cast<u32>(pixel_shader_manager.custom_constants.size()));
pixel_shader_manager.custom_constants_dirty = false;
}
}

bool VertexManager::ReserveConstantStorage()
{
if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size,
auto& system = Core::System::GetInstance();
auto& pixel_shader_manager = system.GetPixelShaderManager();
const u32 custom_constants_size = static_cast<u32>(pixel_shader_manager.custom_constants.size());

if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size + custom_constants_size,
g_vulkan_context->GetUniformBufferAlignment()))
{
return true;
Expand All @@ -276,14 +297,21 @@ bool VertexManager::ReserveConstantStorage()

void VertexManager::UploadAllConstants()
{
auto& system = Core::System::GetInstance();
auto& pixel_shader_manager = system.GetPixelShaderManager();

const u32 custom_constants_size = static_cast<u32>(pixel_shader_manager.custom_constants.size());

// We are free to re-use parts of the buffer now since we're uploading all constants.
const u32 ub_alignment = static_cast<u32>(g_vulkan_context->GetUniformBufferAlignment());
const u32 pixel_constants_offset = 0;
const u32 vertex_constants_offset =
Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment);
const u32 geometry_constants_offset =
Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment);
const u32 allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants);
const u32 custom_pixel_constants_offset =
Common::AlignUp(geometry_constants_offset + sizeof(GeometryShaderConstants), ub_alignment);
const u32 allocation_size = custom_pixel_constants_offset + custom_constants_size;

// Allocate everything at once.
// We should only be here if the buffer was full and a command buffer was submitted anyway.
Expand All @@ -293,8 +321,6 @@ void VertexManager::UploadAllConstants()
return;
}

auto& system = Core::System::GetInstance();
auto& pixel_shader_manager = system.GetPixelShaderManager();
auto& vertex_shader_manager = system.GetVertexShaderManager();
auto& geometry_shader_manager = system.GetGeometryShaderManager();

Expand All @@ -307,6 +333,14 @@ void VertexManager::UploadAllConstants()
UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset,
sizeof(VertexShaderConstants));

if (!pixel_shader_manager.custom_constants.empty())
{
StateTracker::GetInstance()->SetGXUniformBuffer(
UBO_DESCRIPTOR_SET_BINDING_PS_CUST, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset() + custom_pixel_constants_offset,
custom_constants_size);
}
StateTracker::GetInstance()->SetGXUniformBuffer(
UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(),
m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset,
Expand All @@ -319,6 +353,12 @@ void VertexManager::UploadAllConstants()
&vertex_shader_manager.constants, sizeof(VertexShaderConstants));
std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset,
&geometry_shader_manager.constants, sizeof(GeometryShaderConstants));
if (!pixel_shader_manager.custom_constants.empty())
{
std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + custom_pixel_constants_offset,
pixel_shader_manager.custom_constants.data(),
pixel_shader_manager.custom_constants.size());
}

// Finally, flush buffer memory after copying
m_uniform_stream_buffer->CommitMemory(allocation_size);
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/GXPipelineTypes.h
Expand Up @@ -19,7 +19,7 @@ namespace VideoCommon
// As pipelines encompass both shader UIDs and render states, changes to either of these should
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
// caches to be invalidated.
constexpr u32 GX_PIPELINE_UID_VERSION = 7; // Last changed in PR 11859
constexpr u32 GX_PIPELINE_UID_VERSION = 8; // Last changed in PR 12185

struct GXPipelineUid
{
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/GeometryShaderGen.cpp
Expand Up @@ -93,7 +93,7 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&

// uniforms
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("UBO_BINDING(std140, 4) uniform GSBlock {{\n");
else
out.Write("cbuffer GSBlock {{\n");

Expand Down
6 changes: 6 additions & 0 deletions Source/Core/VideoCommon/PixelShaderManager.h
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include <span>

#include "Common/CommonTypes.h"
#include "VideoCommon/ConstantManager.h"

Expand Down Expand Up @@ -52,6 +54,10 @@ class PixelShaderManager final
PixelShaderConstants constants{};
bool dirty = false;

// Constants for custom shaders
std::span<u8> custom_constants;
bool custom_constants_dirty = false;

private:
bool m_fog_range_adjusted_changed = false;
bool m_viewport_changed = false;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/UberShaderVertex.cpp
Expand Up @@ -52,7 +52,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config

if (vertex_loader)
{
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("UBO_BINDING(std140, 4) uniform GSBlock {{\n");
out.Write("{}", s_geometry_shader_uniforms);
out.Write("}};\n");
}
Expand Down Expand Up @@ -84,7 +84,7 @@ SSBO_BINDING(1) readonly restrict buffer Vertices {{
// D3D12 uses a root constant for this uniform, since it changes with every draw.
// D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something
// out for it if we want to support it in the future.
out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n"
out.Write("UBO_BINDING(std140, 5) uniform DX_Constants {{\n"
" uint base_vertex;\n"
"}};\n\n"
"uint GetVertexBaseOffset(uint vertex_id) {{\n"
Expand Down
10 changes: 9 additions & 1 deletion Source/Core/VideoCommon/VertexManagerBase.cpp
Expand Up @@ -595,6 +595,7 @@ void VertexManagerBase::Flush()
CustomPixelShaderContents custom_pixel_shader_contents;
std::optional<CustomPixelShader> custom_pixel_shader;
std::vector<std::string> custom_pixel_texture_names;
std::span<u8> custom_pixel_shader_uniforms;
for (int i = 0; i < texture_names.size(); i++)
{
const std::string& texture_name = texture_names[i];
Expand Down Expand Up @@ -644,6 +645,12 @@ void VertexManagerBase::Flush()
// Now we can upload uniforms, as nothing else will override them.
geometry_shader_manager.SetConstants(m_current_primitive_type);
pixel_shader_manager.SetConstants();
if (!custom_pixel_shader_uniforms.empty() &&
pixel_shader_manager.custom_constants.data() != custom_pixel_shader_uniforms.data())
{
pixel_shader_manager.custom_constants_dirty = true;
}
pixel_shader_manager.custom_constants = custom_pixel_shader_uniforms;
UploadUniforms();

// Update the pipeline, or compile one if needed.
Expand Down Expand Up @@ -1052,7 +1059,8 @@ void VertexManagerBase::OnEndFrame()
return;

// In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway
// between the draw counters that invoked the readback, or every 250 draws, whichever is smaller.
// between the draw counters that invoked the readback, or every 250 draws, whichever is
// smaller.
if (g_ActiveConfig.iCommandBufferExecuteInterval > 0)
{
u32 last_draw_counter = 0;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/VertexShaderGen.cpp
Expand Up @@ -96,14 +96,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho

if (uid_data->vs_expand != VSExpand::None)
{
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("UBO_BINDING(std140, 4) uniform GSBlock {{\n");
out.Write("{}", s_geometry_shader_uniforms);
out.Write("}};\n");

if (api_type == APIType::D3D)
{
// D3D doesn't include the base vertex in SV_VertexID
out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n"
out.Write("UBO_BINDING(std140, 5) uniform DX_Constants {{\n"
" uint base_vertex;\n"
"}};\n\n");
}
Expand Down