@@ -608,7 +608,7 @@ static NSRange RangeOfBits(u32 value)
BeginRenderPass(MTLLoadActionLoad);
id<MTLRenderCommandEncoder> enc = m_current_render_encoder;
const Pipeline* pipe = m_state.render_pipeline;
bool is_gx = pipe->Usage() == AbstractPipelineUsage::GX;
bool is_gx = pipe->Usage() != AbstractPipelineUsage::Utility;
NSString* label = is_gx ? LABEL_GX : LABEL_UTIL;
if (m_flags.should_apply_label && m_current.label != label)
{
@@ -74,6 +74,7 @@
config->backend_info.bSupportsSettingObjectNames = true;
// Metal requires multisample resolve to be done on a render pass
config->backend_info.bSupportsPartialMultisampleResolve = false;
config->backend_info.bSupportsDynamicVertexLoader = true;
}

void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config,
@@ -426,6 +427,7 @@ fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
static const spirv_cross::MSLResourceBinding resource_bindings[] = {
MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelVertex, 2, 1, 0, 0, 0), // vs/ssbo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
@@ -125,7 +125,7 @@ static void SetAttribute(MTLVertexDescriptor* desc, u32 attribute, const Attribu

template <size_t N>
static void SetAttributes(MTLVertexDescriptor* desc, u32 attribute,
const AttributeFormat (&format)[N])
const std::array<AttributeFormat, N>& format)
{
for (size_t i = 0; i < N; ++i)
SetAttribute(desc, attribute + i, format[i]);
@@ -60,6 +60,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = false;

// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();
@@ -94,6 +94,8 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
// Unneccessary since OGL doesn't use pipelines
g_Config.backend_info.bSupportsDynamicVertexLoader = false;

// TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics
// options will show the option when it is not supported. The only way around this would be
@@ -89,6 +89,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = false;

// aamodes
g_Config.backend_info.AAModes = {1};
@@ -39,6 +39,8 @@ enum DESCRIPTOR_SET_LAYOUT
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
// - 1 SSBO accessible from PS if supported [set=2, binding=0]
// - Uber
// - Like standard, plus 1 SSBO accessible from VS if supported [set=2, binding=1]
// - Utility
// - 1 combined UBO, accessible from VS/GS/PS [set=0, binding=0]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
@@ -55,6 +57,7 @@ enum DESCRIPTOR_SET_LAYOUT
enum PIPELINE_LAYOUT
{
PIPELINE_LAYOUT_STANDARD,
PIPELINE_LAYOUT_UBER,
PIPELINE_LAYOUT_UTILITY,
PIPELINE_LAYOUT_COMPUTE,
NUM_PIPELINE_LAYOUTS
@@ -123,8 +123,10 @@ bool ObjectCache::CreateDescriptorSetLayouts()
VK_SHADER_STAGE_FRAGMENT_BIT},
}};

static const std::array<VkDescriptorSetLayoutBinding, 1> standard_ssbo_bindings{{
// The dynamic veretex loader's vertex buffer must be last here, for similar reasons
static const std::array<VkDescriptorSetLayoutBinding, 2> standard_ssbo_bindings{{
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT},
}};

static const std::array<VkDescriptorSetLayoutBinding, 1> utility_ubo_bindings{{
@@ -173,6 +175,10 @@ bool ObjectCache::CreateDescriptorSetLayouts()
if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders)
create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS].bindingCount--;

// Remove the dynamic vertex loader's buffer if it'll never be needed
if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS].bindingCount--;

for (size_t i = 0; i < create_infos.size(); i++)
{
VkResult res = vkCreateDescriptorSetLayout(g_vulkan_context->GetDevice(), &create_infos[i],
@@ -206,6 +212,11 @@ bool ObjectCache::CreatePipelineLayouts()
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS],
};
const std::array<VkDescriptorSetLayout, 3> uber_sets{
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS],
};
const std::array<VkDescriptorSetLayout, 2> utility_sets{
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS],
@@ -220,6 +231,10 @@ bool ObjectCache::CreatePipelineLayouts()
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(standard_sets.size()), standard_sets.data(), 0, nullptr},

// Uber
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(uber_sets.size()), uber_sets.data(), 0, nullptr},

// Utility
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(utility_sets.size()), utility_sets.data(), 0, nullptr},
@@ -232,6 +247,10 @@ bool ObjectCache::CreatePipelineLayouts()
// If bounding box is unsupported, don't bother with the SSBO descriptor set.
if (!g_ActiveConfig.backend_info.bSupportsBBox)
pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--;
// If neither SSBO-using feature is supported, skip in ubershaders too
if (!g_ActiveConfig.backend_info.bSupportsBBox &&
!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
pipeline_layout_info[PIPELINE_LAYOUT_UBER].setLayoutCount--;

for (size_t i = 0; i < pipeline_layout_info.size(); i++)
{
@@ -77,14 +77,23 @@ bool StateTracker::Initialize()
return true;
}

void StateTracker::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset)
void StateTracker::SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset, u32 size)
{
if (m_vertex_buffer == buffer && m_vertex_buffer_offset == offset)
return;

m_vertex_buffer = buffer;
m_vertex_buffer_offset = offset;
m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER;
if (m_vertex_buffer != buffer || m_vertex_buffer_offset != offset)
{
m_vertex_buffer = buffer;
m_vertex_buffer_offset = offset;
m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER;
}
if (m_bindings.gx_uber_vertex_ssbo.buffer != buffer ||
m_bindings.gx_uber_vertex_ssbo.offset != offset ||
m_bindings.gx_uber_vertex_ssbo.range != size)
{
m_bindings.gx_uber_vertex_ssbo.buffer = buffer;
m_bindings.gx_uber_vertex_ssbo.offset = offset;
m_bindings.gx_uber_vertex_ssbo.range = size;
m_dirty_flags |= DIRTY_FLAG_GX_SSBO;
}
}

void StateTracker::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type)
@@ -366,8 +375,13 @@ bool StateTracker::Bind()

// Re-bind parts of the pipeline
const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER)
const bool needs_vertex_buffer = !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader ||
m_pipeline->GetUsage() != AbstractPipelineUsage::GXUber;
if (needs_vertex_buffer && (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER))
{
vkCmdBindVertexBuffers(command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset);
m_dirty_flags &= ~DIRTY_FLAG_VERTEX_BUFFER;
}

if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER)
vkCmdBindIndexBuffer(command_buffer, m_index_buffer, m_index_buffer_offset, m_index_type);
@@ -381,8 +395,8 @@ bool StateTracker::Bind()
if (m_dirty_flags & DIRTY_FLAG_SCISSOR)
vkCmdSetScissor(command_buffer, 0, 1, &m_scissor);

m_dirty_flags &= ~(DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE |
DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
m_dirty_flags &=
~(DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
return true;
}

@@ -452,7 +466,7 @@ void StateTracker::EndClearRenderPass()

bool StateTracker::UpdateDescriptorSet()
{
if (m_pipeline->GetUsage() == AbstractPipelineUsage::GX)
if (m_pipeline->GetUsage() != AbstractPipelineUsage::Utility)
return UpdateGXDescriptorSet();
else
return UpdateUtilityDescriptorSet();
@@ -462,7 +476,7 @@ bool StateTracker::UpdateGXDescriptorSet()
{
const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO
1 + // Samplers
1; // SSBO
2; // SSBO
std::array<VkWriteDescriptorSet, MAX_DESCRIPTOR_WRITES> writes;
u32 num_writes = 0;

@@ -516,7 +530,12 @@ bool StateTracker::UpdateGXDescriptorSet()
m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SAMPLERS) | DIRTY_FLAG_DESCRIPTOR_SETS;
}

if (g_ActiveConfig.backend_info.bSupportsBBox &&
const bool needs_bbox_ssbo = g_ActiveConfig.backend_info.bSupportsBBox;
const bool needs_vertex_ssbo = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader &&
m_pipeline->GetUsage() == AbstractPipelineUsage::GXUber;
const bool needs_ssbo = needs_bbox_ssbo || needs_vertex_ssbo;

if (needs_ssbo &&
(m_dirty_flags & DIRTY_FLAG_GX_SSBO || m_gx_descriptor_sets[2] == VK_NULL_HANDLE))
{
m_gx_descriptor_sets[2] =
@@ -528,6 +547,21 @@ bool StateTracker::UpdateGXDescriptorSet()
writes[num_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, m_gx_descriptor_sets[2], 0, 0, 1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, &m_bindings.ssbo, nullptr};

if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
{
writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
m_gx_descriptor_sets[2],
1,
0,
1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
nullptr,
&m_bindings.gx_uber_vertex_ssbo,
nullptr};
}

m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SSBO) | DIRTY_FLAG_DESCRIPTOR_SETS;
}

@@ -538,9 +572,7 @@ bool StateTracker::UpdateGXDescriptorSet()
{
vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0,
g_ActiveConfig.backend_info.bSupportsBBox ?
NUM_GX_DESCRIPTOR_SETS :
(NUM_GX_DESCRIPTOR_SETS - 1),
needs_ssbo ? NUM_GX_DESCRIPTOR_SETS : (NUM_GX_DESCRIPTOR_SETS - 1),
m_gx_descriptor_sets.data(),
g_ActiveConfig.backend_info.bSupportsGeometryShaders ?
NUM_UBO_DESCRIPTOR_SET_BINDINGS :
@@ -32,7 +32,7 @@ class StateTracker

VKFramebuffer* GetFramebuffer() const { return m_framebuffer; }
const VKPipeline* GetPipeline() const { return m_pipeline; }
void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset);
void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset, u32 size);
void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type);
void SetFramebuffer(VKFramebuffer* framebuffer);
void SetPipeline(const VKPipeline* pipeline);
@@ -145,6 +145,7 @@ class StateTracker
std::array<VkDescriptorImageInfo, NUM_PIXEL_SHADER_SAMPLERS> samplers;
std::array<VkBufferView, NUM_COMPUTE_TEXEL_BUFFERS> texel_buffers;
VkDescriptorBufferInfo ssbo;
VkDescriptorBufferInfo gx_uber_vertex_ssbo;
VkDescriptorImageInfo image_texture;
} m_bindings = {};
std::array<VkDescriptorSet, NUM_GX_DESCRIPTOR_SETS> m_gx_descriptor_sets = {};
@@ -251,6 +251,9 @@ std::unique_ptr<VKPipeline> VKPipeline::Create(const AbstractPipelineConfig& con
case AbstractPipelineUsage::GX:
pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
break;
case AbstractPipelineUsage::GXUber:
pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UBER);
break;
case AbstractPipelineUsage::Utility:
pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UTILITY);
break;
@@ -62,7 +62,8 @@ bool VertexManager::Initialize()
return false;

m_vertex_stream_buffer =
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE);
StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VERTEX_STREAM_BUFFER_SIZE);
m_index_stream_buffer =
StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE);
m_uniform_stream_buffer =
@@ -186,7 +187,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in
ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast<int>(vertex_data_size));
ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast<int>(index_data_size));

StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0);
StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0,
VERTEX_STREAM_BUFFER_SIZE);
StateTracker::GetInstance()->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0,
VK_INDEX_TYPE_UINT16);
}
@@ -295,6 +295,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS.
config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features.
config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support.
config->backend_info.bSupportsDynamicVertexLoader = true; // Assumed support.
}

void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)
@@ -20,6 +20,8 @@ class NativeVertexFormat;
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS)
// - 8 combined image samplers (accessible from PS)
// - 1 SSBO, accessible from PS if bounding box is enabled
// - GX Uber
// - Same as GX, plus one VS SSBO for vertices if dynamic vertex loading is enabled
// - Utility
// - Single UBO, accessible from all stages [set=0, binding=1]
// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7]
@@ -32,6 +34,7 @@ class NativeVertexFormat;
enum class AbstractPipelineUsage
{
GX,
GXUber,
Utility
};

@@ -93,6 +93,13 @@ struct VertexShaderConstants

float4 cached_tangent;
float4 cached_binormal;
// For UberShader vertex loader
u32 vertex_stride;
std::array<u32, 3> vertex_offset_normals;
u32 vertex_offset_position;
u32 vertex_offset_posmtx;
std::array<u32, 2> vertex_offset_colors;
std::array<u32, 8> vertex_offset_texcoords;
};

struct GeometryShaderConstants
@@ -58,9 +58,9 @@ struct PortableVertexDeclaration
int stride;

AttributeFormat position;
AttributeFormat normals[3];
AttributeFormat colors[2];
AttributeFormat texcoords[8];
std::array<AttributeFormat, 3> normals;
std::array<AttributeFormat, 2> colors;
std::array<AttributeFormat, 8> texcoords;
AttributeFormat posmtx;

inline bool operator<(const PortableVertexDeclaration& b) const
@@ -588,10 +588,10 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig(
const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader,
const AbstractShader* geometry_shader, const AbstractShader* pixel_shader,
const RasterizationState& rasterization_state, const DepthState& depth_state,
const BlendingState& blending_state)
const BlendingState& blending_state, AbstractPipelineUsage usage)
{
AbstractPipelineConfig config = {};
config.usage = AbstractPipelineUsage::GX;
config.usage = usage;
config.vertex_format = vertex_format;
config.vertex_shader = vertex_shader;
config.geometry_shader = geometry_shader;
@@ -735,14 +735,16 @@ ShaderCache::GetGXPipelineConfig(const GXPipelineUid& config_in)
}

return GetGXPipelineConfig(config.vertex_format, vs, gs, ps, config.rasterization_state,
config.depth_state, config.blending_state);
config.depth_state, config.blending_state, AbstractPipelineUsage::GX);
}

/// Edits the UID based on driver bugs and other special configurations
static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
{
GXUberPipelineUid out;
memcpy(&out, &in, sizeof(out)); // Copy padding
if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
out.vertex_format = nullptr;
if (g_ActiveConfig.backend_info.bSupportsFramebufferFetch)
{
// Always blend in shader
@@ -798,7 +800,8 @@ ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config_in)
}

return GetGXPipelineConfig(config.vertex_format, vs, gs, ps, config.rasterization_state,
config.depth_state, config.blending_state);
config.depth_state, config.blending_state,
AbstractPipelineUsage::GXUber);
}

const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineUid& config,
@@ -1233,32 +1236,32 @@ void ShaderCache::QueueUberShaderPipelines()
dummy_vertex_decl.stride = sizeof(float) * 4;
NativeVertexFormat* dummy_vertex_format =
VertexLoaderManager::GetUberVertexFormat(dummy_vertex_decl);
auto QueueDummyPipeline = [&](const UberShader::VertexShaderUid& vs_uid,
const GeometryShaderUid& gs_uid,
const UberShader::PixelShaderUid& ps_uid) {
GXUberPipelineUid config;
config.vertex_format = dummy_vertex_format;
config.vs_uid = vs_uid;
config.gs_uid = gs_uid;
config.ps_uid = ps_uid;
config.rasterization_state = RenderState::GetCullBackFaceRasterizationState(
static_cast<PrimitiveType>(gs_uid.GetUidData()->primitive_type));
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
if (ps_uid.GetUidData()->uint_output)
{
// uint_output is only ever enabled when logic ops are enabled.
config.blending_state.logicopenable = true;
config.blending_state.logicmode = LogicOp::And;
}
auto QueueDummyPipeline =
[&](const UberShader::VertexShaderUid& vs_uid, const GeometryShaderUid& gs_uid,
const UberShader::PixelShaderUid& ps_uid, const BlendingState& blend) {
GXUberPipelineUid config;
config.vertex_format = dummy_vertex_format;
config.vs_uid = vs_uid;
config.gs_uid = gs_uid;
config.ps_uid = ps_uid;
config.rasterization_state = RenderState::GetCullBackFaceRasterizationState(
static_cast<PrimitiveType>(gs_uid.GetUidData()->primitive_type));
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = blend;
if (ps_uid.GetUidData()->uint_output)
{
// uint_output is only ever enabled when logic ops are enabled.
config.blending_state.logicopenable = true;
config.blending_state.logicmode = LogicOp::And;
}

auto iter = m_gx_uber_pipeline_cache.find(config);
if (iter != m_gx_uber_pipeline_cache.end())
return;
auto iter = m_gx_uber_pipeline_cache.find(config);
if (iter != m_gx_uber_pipeline_cache.end())
return;

auto& entry = m_gx_uber_pipeline_cache[config];
entry.second = false;
};
auto& entry = m_gx_uber_pipeline_cache[config];
entry.second = false;
};

// Populate the pipeline configs with empty entries, these will be compiled afterwards.
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) {
@@ -1275,7 +1278,45 @@ void ShaderCache::QueueUberShaderPipelines()
{
return;
}
QueueDummyPipeline(vuid, guid, cleared_puid);
BlendingState blend = RenderState::GetNoBlendingBlendState();
QueueDummyPipeline(vuid, guid, cleared_puid, blend);
if (g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader)
{
// Not all GPUs need all the pipeline state compiled into shaders, so they tend to key
// compiled shaders based on some subset of the pipeline state.
// Some test results:
// (GPUs tested: AMD Radeon Pro 5600M, Nvidia GT 750M, Intel UHD 630,
// Intel Iris Pro 5200, Apple M1)
// MacOS Metal:
// - AMD, Nvidia, Intel GPUs: Shaders are keyed on vertex layout and whether or not
// dual source blend is enabled. That's it.
// - Apple GPUs: Shaders are keyed on vertex layout and all blending settings. We use
// framebuffer fetch here, so the only blending settings used by ubershaders are the
// alphaupdate and colorupdate ones. Also keyed on primitive type, but Metal supports
// setting it to "unknown" and we do for ubershaders (but MoltenVK won't).
// Windows Vulkan:
// - AMD, Nvidia: Definitely keyed on dual source blend, but the others seem more random
// Changing a setting on one shader will require a recompile, but changing the same
// setting on another won't. Compiling a copy with alphaupdate off, colorupdate off,
// and one with DSB on seems to get pretty good coverage though.
// Windows D3D12:
// - AMD: Keyed on dual source blend and vertex layout
// - Nvidia Kepler: No recompiles for changes to vertex layout or blend
blend.alphaupdate = false;
QueueDummyPipeline(vuid, guid, cleared_puid, blend);
blend.alphaupdate = true;
blend.colorupdate = false;
QueueDummyPipeline(vuid, guid, cleared_puid, blend);
blend.colorupdate = true;
if (!cleared_puid.GetUidData()->no_dual_src && !cleared_puid.GetUidData()->uint_output)
{
blend.blendenable = true;
blend.usedualsrc = true;
blend.srcfactor = SrcBlendFactor::SrcAlpha;
blend.dstfactor = DstBlendFactor::InvSrcAlpha;
QueueDummyPipeline(vuid, guid, cleared_puid, blend);
}
}
});
});
});
@@ -151,7 +151,7 @@ class ShaderCache final
GetGXPipelineConfig(const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader,
const AbstractShader* geometry_shader, const AbstractShader* pixel_shader,
const RasterizationState& rasterization_state, const DepthState& depth_state,
const BlendingState& blending_state);
const BlendingState& blending_state, AbstractPipelineUsage usage);
std::optional<AbstractPipelineConfig> GetGXPipelineConfig(const GXPipelineUid& uid);
std::optional<AbstractPipelineConfig> GetGXPipelineConfig(const GXUberPipelineUid& uid);
const AbstractPipeline* InsertGXPipeline(const GXPipelineUid& config,
@@ -43,6 +43,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.manual_texture_sampling_custom_texture_sizes =
g_ActiveConfig.ManualTextureSamplingWithHiResTextures();
bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler;
bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader;
return bits;
}

@@ -177,6 +177,7 @@ union ShaderHostConfig
BitField<24, 1, bool, u32> manual_texture_sampling;
BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes;
BitField<26, 1, bool, u32> backend_sampler_lod_bias;
BitField<27, 1, bool, u32> backend_dynamic_vertex_loader;

static ShaderHostConfig GetCurrent();
};
@@ -302,6 +303,15 @@ static const char s_shader_uniforms[] = "\tuint components;\n"
"\tuint4 xfmem_pack1[8];\n"
"\tfloat4 " I_CACHED_TANGENT ";\n"
"\tfloat4 " I_CACHED_BINORMAL ";\n"
"\tuint vertex_stride;\n"
"\tuint vertex_offset_rawnormal;\n"
"\tuint vertex_offset_rawtangent;\n"
"\tuint vertex_offset_rawbinormal;\n"
"\tuint vertex_offset_rawpos;\n"
"\tuint vertex_offset_posmtx;\n"
"\tuint vertex_offset_rawcolor0;\n"
"\tuint vertex_offset_rawcolor1;\n"
"\tuint4 vertex_offset_rawtex[2];\n" // std140 is pain
"\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n"
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"

Large diffs are not rendered by default.

@@ -353,7 +353,8 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
VertexShaderManager::SetVertexFormat(loader->m_native_components);
VertexShaderManager::SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());

// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
@@ -606,13 +606,42 @@ void VertexShaderManager::SetMaterialColorChanged(int index)
nMaterialsChanged[index] = true;
}

void VertexShaderManager::SetVertexFormat(u32 components)
static void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
{
if (components != constants.components)
{
constants.components = components;
dirty = true;
}
if (*old_value == new_value)
return;
*old_value = new_value;
*dirty = true;
}

static void UpdateOffset(bool* dirty, bool include_components, u32* old_value,
const AttributeFormat& attribute)
{
if (!attribute.enable)
return;
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
if (include_components)
new_value |= attribute.components << 16;
UpdateValue(dirty, old_value, new_value);
}

template <size_t N>
static void UpdateOffsets(bool* dirty, bool include_components, std::array<u32, N>* old_value,
const std::array<AttributeFormat, N>& attribute)
{
for (size_t i = 0; i < N; i++)
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
}

void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
{
UpdateValue(&dirty, &constants.components, components);
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
}

void VertexShaderManager::SetTexMatrixInfoChanged(int index)
@@ -10,6 +10,7 @@
#include "VideoCommon/ConstantManager.h"

class PointerWrap;
struct PortableVertexDeclaration;

// The non-API dependent parts.
class VertexShaderManager
@@ -29,7 +30,7 @@ class VertexShaderManager
static void SetProjectionChanged();
static void SetMaterialColorChanged(int index);

static void SetVertexFormat(u32 components);
static void SetVertexFormat(u32 components, const PortableVertexDeclaration& format);
static void SetTexMatrixInfoChanged(int index);
static void SetLightingConfigChanged();

@@ -232,6 +232,7 @@ struct VideoConfig final
bool bSupportsLodBiasInSampler = false;
bool bSupportsSettingObjectNames = false;
bool bSupportsPartialMultisampleResolve = false;
bool bSupportsDynamicVertexLoader = false;
} backend_info;

// Utility