Skip to content
Permalink
Browse files
Merge pull request #10781 from tellowkrinkle/UberVertexLoader
VideoCommon: Add dynamic vertex loader for ubershaders to reduce pipeline count
  • Loading branch information
JMC47 committed Sep 20, 2022
2 parents 4ea694a + ee692ab commit 22197c0
Show file tree
Hide file tree
Showing 36 changed files with 558 additions and 163 deletions.
@@ -74,7 +74,7 @@ static std::recursive_mutex g_save_thread_mutex;
static std::thread g_save_thread;

// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 148; // Last changed in PR 10768
constexpr u32 STATE_VERSION = 149; // Last changed in PR 10781

// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,
@@ -112,6 +112,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter);
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = false;

g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames();
g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter);
@@ -180,7 +180,8 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline)
m_state.root_signature = dx_pipeline->GetRootSignature();
m_dirty_bits |= DirtyState_RootSignature | DirtyState_PS_CBV | DirtyState_VS_CBV |
DirtyState_GS_CBV | DirtyState_SRV_Descriptor |
DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor;
DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VS_SRV_Descriptor;
}
if (dx_pipeline->UseIntegerRTV() != m_state.using_integer_rtv)
{
@@ -362,6 +363,11 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
if (!ApplyState())
return;

// DX12 is great and doesn't include the base vertex in SV_VertexID
if (static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() ==
AbstractPipelineUsage::GXUber)
g_dx_context->GetCommandList()->SetGraphicsRoot32BitConstant(
ROOT_PARAMETER_BASE_VERTEX_CONSTANT, base_vertex, 0);
g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0);
}

@@ -494,18 +500,22 @@ void Renderer::SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle)
m_dirty_bits |= DirtyState_PS_UAV;
}

void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size)
void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv,
u32 stride, u32 size)
{
if (m_state.vertex_buffer.BufferLocation == address &&
m_state.vertex_buffer.StrideInBytes == stride && m_state.vertex_buffer.SizeInBytes == size)
if (m_state.vertex_buffer.BufferLocation != address ||
m_state.vertex_buffer.StrideInBytes != stride || m_state.vertex_buffer.SizeInBytes != size)
{
return;
m_state.vertex_buffer.BufferLocation = address;
m_state.vertex_buffer.StrideInBytes = stride;
m_state.vertex_buffer.SizeInBytes = size;
m_dirty_bits |= DirtyState_VertexBuffer;
}
if (m_state.vs_srv.ptr != srv.ptr)
{
m_state.vs_srv = srv;
m_dirty_bits |= DirtyState_VS_SRV;
}

m_state.vertex_buffer.BufferLocation = address;
m_state.vertex_buffer.StrideInBytes = stride;
m_state.vertex_buffer.SizeInBytes = size;
m_dirty_bits |= DirtyState_VertexBuffer;
}

void Renderer::SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format)
@@ -535,15 +545,17 @@ bool Renderer::ApplyState()
// Clear bits before actually changing state. Some state (e.g. cbuffers) can't be set
// if utility pipelines are bound.
const u32 dirty_bits = m_dirty_bits;
m_dirty_bits &= ~(
DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport | DirtyState_ScissorRect |
DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV |
DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology);
m_dirty_bits &=
~(DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport |
DirtyState_ScissorRect | DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV |
DirtyState_GS_CBV | DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor |
DirtyState_UAV_Descriptor | DirtyState_VertexBuffer | DirtyState_IndexBuffer |
DirtyState_PrimitiveTopology | DirtyState_VS_SRV_Descriptor);

auto* const cmdlist = g_dx_context->GetCommandList();
auto* const pipeline = static_cast<const DXPipeline*>(m_current_pipeline);
if (dirty_bits & DirtyState_Pipeline)
cmdlist->SetPipelineState(static_cast<const DXPipeline*>(m_current_pipeline)->GetPipeline());
cmdlist->SetPipelineState(pipeline->GetPipeline());

if (dirty_bits & DirtyState_Framebuffer)
BindFramebuffer(static_cast<DXFramebuffer*>(m_current_framebuffer));
@@ -572,7 +584,7 @@ bool Renderer::ApplyState()
m_state.sampler_descriptor_base);
}

if (static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() == AbstractPipelineUsage::GX)
if (pipeline->GetUsage() != AbstractPipelineUsage::Utility)
{
if (dirty_bits & DirtyState_VS_CBV)
{
@@ -589,6 +601,13 @@ bool Renderer::ApplyState()
}
}

if (dirty_bits & DirtyState_VS_SRV_Descriptor &&
pipeline->GetUsage() == AbstractPipelineUsage::GXUber)
{
cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_VS_SRV,
m_state.vertex_srv_descriptor_base);
}

if (dirty_bits & DirtyState_GS_CBV)
{
cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_GS_CBV,
@@ -641,7 +660,9 @@ void Renderer::UpdateDescriptorTables()
const bool sampler_update_failed =
(m_dirty_bits & DirtyState_Samplers) && !UpdateSamplerDescriptorTable();
const bool uav_update_failed = (m_dirty_bits & DirtyState_PS_UAV) && !UpdateUAVDescriptorTable();
if (texture_update_failed || sampler_update_failed || uav_update_failed)
const bool srv_update_failed =
(m_dirty_bits & DirtyState_VS_SRV) && !UpdateVSSRVDescriptorTable();
if (texture_update_failed || sampler_update_failed || uav_update_failed || srv_update_failed)
{
WARN_LOG_FMT(VIDEO, "Executing command list while waiting for temporary {}",
texture_update_failed ? "descriptors" : "samplers");
@@ -651,6 +672,7 @@ void Renderer::UpdateDescriptorTables()
UpdateSRVDescriptorTable();
UpdateSamplerDescriptorTable();
UpdateUAVDescriptorTable();
UpdateVSSRVDescriptorTable();
}
}

@@ -700,6 +722,26 @@ bool Renderer::UpdateUAVDescriptorTable()
return true;
}

bool Renderer::UpdateVSSRVDescriptorTable()
{
if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader ||
static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() !=
AbstractPipelineUsage::GXUber)
{
return true;
}

DescriptorHandle handle;
if (!g_dx_context->GetDescriptorAllocator()->Allocate(1, &handle))
return false;

g_dx_context->GetDevice()->CopyDescriptorsSimple(1, handle.cpu_handle, m_state.vs_srv,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
m_state.vertex_srv_descriptor_base = handle.gpu_handle;
m_dirty_bits = (m_dirty_bits & ~DirtyState_VS_SRV) | DirtyState_VS_SRV_Descriptor;
return true;
}

bool Renderer::UpdateComputeUAVDescriptorTable()
{
DescriptorHandle handle;
@@ -88,7 +88,8 @@ class Renderer final : public ::Renderer
void SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);

// Graphics vertex/index buffer binding.
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size);
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv,
u32 stride, u32 size);
void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format);

// Binds all dirty state
@@ -126,14 +127,17 @@ class Renderer final : public ::Renderer
DirtyState_RootSignature = (1 << 17),
DirtyState_ComputeRootSignature = (1 << 18),
DirtyState_DescriptorHeaps = (1 << 19),
DirtyState_VS_SRV = (1 << 20),
DirtyState_VS_SRV_Descriptor = (1 << 21),

DirtyState_All =
DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Textures | DirtyState_Samplers |
DirtyState_Viewport | DirtyState_ScissorRect | DirtyState_ComputeImageTexture |
DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV |
DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology |
DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps
DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps |
DirtyState_VS_SRV | DirtyState_VS_SRV_Descriptor
};

void CheckForSwapChainChanges();
@@ -144,6 +148,7 @@ class Renderer final : public ::Renderer
void UpdateDescriptorTables();
bool UpdateSRVDescriptorTable();
bool UpdateUAVDescriptorTable();
bool UpdateVSSRVDescriptorTable();
bool UpdateComputeUAVDescriptorTable();
bool UpdateSamplerDescriptorTable();

@@ -157,11 +162,13 @@ class Renderer final : public ::Renderer
DXShader* compute_shader = nullptr;
std::array<D3D12_GPU_VIRTUAL_ADDRESS, 3> constant_buffers = {};
std::array<D3D12_CPU_DESCRIPTOR_HANDLE, MAX_TEXTURES> textures = {};
D3D12_CPU_DESCRIPTOR_HANDLE vs_srv = {};
D3D12_CPU_DESCRIPTOR_HANDLE ps_uav = {};
SamplerStateSet samplers = {};
const DXTexture* compute_image_texture = nullptr;
D3D12_VIEWPORT viewport = {};
D3D12_RECT scissor = {};
D3D12_GPU_DESCRIPTOR_HANDLE vertex_srv_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE srv_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE sampler_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE uav_descriptor_base = {};
@@ -64,6 +64,18 @@ bool VertexManager::Initialize()
&srv_desc, dh.cpu_handle);
}

if (!g_dx_context->GetDescriptorHeapManager().Allocate(&m_vertex_srv))
{
PanicAlertFmt("Failed to allocate descriptor for vertex srv");
return false;
}

D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {DXGI_FORMAT_R32_UINT, D3D12_SRV_DIMENSION_BUFFER,
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
srv_desc.Buffer.NumElements = m_vertex_stream_buffer.GetSize() / sizeof(u32);
g_dx_context->GetDevice()->CreateShaderResourceView(m_vertex_stream_buffer.GetBuffer(), &srv_desc,
m_vertex_srv.cpu_handle);

UploadAllConstants();
return true;
}
@@ -115,7 +127,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in
ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast<int>(vertex_data_size));
ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast<int>(index_data_size));

Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), vertex_stride,
Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(),
m_vertex_srv.cpu_handle, vertex_stride,
m_vertex_stream_buffer.GetSize());
Renderer::GetInstance()->SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(),
m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
@@ -46,6 +46,7 @@ class VertexManager final : public VertexManagerBase
StreamBuffer m_uniform_stream_buffer;
StreamBuffer m_texel_stream_buffer;
std::array<DescriptorHandle, NUM_TEXEL_BUFFER_FORMATS> m_texel_buffer_views = {};
DescriptorHandle m_vertex_srv = {};
};

} // namespace DX12
@@ -261,6 +261,16 @@ bool DXContext::CreateDescriptorHeaps()
return true;
}

static void SetRootParamConstant(D3D12_ROOT_PARAMETER* rp, u32 shader_reg, u32 num_values,
D3D12_SHADER_VISIBILITY visibility)
{
rp->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
rp->Constants.Num32BitValues = num_values;
rp->Constants.ShaderRegister = shader_reg;
rp->Constants.RegisterSpace = 0;
rp->ShaderVisibility = visibility;
}

static void SetRootParamCBV(D3D12_ROOT_PARAMETER* rp, u32 shader_reg,
D3D12_SHADER_VISIBILITY visibility)
{
@@ -345,6 +355,11 @@ bool DXContext::CreateGXRootSignature()
param_count++;
SetRootParamCBV(&params[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY);
param_count++;
SetRootParamTable(&params[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3,
1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
SetRootParamConstant(&params[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;

// Since these must be contiguous, pixel lighting goes to bbox if not enabled.
if (g_ActiveConfig.bBBoxEnable)
@@ -27,6 +27,8 @@ enum ROOT_PARAMETER
ROOT_PARAMETER_VS_CBV,
ROOT_PARAMETER_VS_CBV2,
ROOT_PARAMETER_GS_CBV,
ROOT_PARAMETER_VS_SRV,
ROOT_PARAMETER_BASE_VERTEX_CONSTANT,
ROOT_PARAMETER_PS_UAV_OR_CBV2,
ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled
NUM_ROOT_PARAMETERS
@@ -165,6 +165,7 @@ std::unique_ptr<DXPipeline> DXPipeline::Create(const AbstractPipelineConfig& con
switch (config.usage)
{
case AbstractPipelineUsage::GX:
case AbstractPipelineUsage::GXUber:
desc.pRootSignature = g_dx_context->GetGXRootSignature();
break;
case AbstractPipelineUsage::Utility:
@@ -87,6 +87,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = true;

// We can only check texture support once we have a device.
if (g_dx_context)
@@ -220,7 +220,7 @@ static MTLCullMode Convert(CullMode cull)
switch (cull)
{
case CullMode::None:
case CullMode::All: // Handled by disabling rasterization
case CullMode::All: // Handled by VertexLoaderManager::RunVertices
return MTLCullModeNone;
case CullMode::Front:
return MTLCullModeFront;
@@ -289,7 +289,8 @@ explicit VertexAttribute(AttributeFormat format)
}
};
template <size_t N>
static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeFormat (&input)[N])
static void CopyAll(std::array<VertexAttribute, N>& output,
const std::array<AttributeFormat, N>& input)
{
for (size_t i = 0; i < N; ++i)
output[i] = VertexAttribute(input[i]);
@@ -327,13 +328,14 @@ static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeForma
blend.subtractAlpha = cfg.blending_state.subtractAlpha.Value();
// clang-format on
}
// Throw extras in bits we don't otherwise use
if (cfg.rasterization_state.cullmode == CullMode::All)
blend.hex |= 1 << 29;
if (cfg.rasterization_state.primitive == PrimitiveType::Points)
blend.hex |= 1 << 30;
else if (cfg.rasterization_state.primitive == PrimitiveType::Lines)
blend.hex |= 1 << 31;

if (cfg.usage != AbstractPipelineUsage::GXUber)
{
if (cfg.rasterization_state.primitive == PrimitiveType::Points)
is_points = true;
else if (cfg.rasterization_state.primitive == PrimitiveType::Lines)
is_lines = true;
}
}
PipelineID() { memset(this, 0, sizeof(*this)); }
PipelineID(const PipelineID& other) { memcpy(this, &other, sizeof(*this)); }
@@ -359,7 +361,13 @@ static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeForma
VertexAttribute v_posmtx;
const Shader* vertex_shader;
const Shader* fragment_shader;
BlendingState blend;
union
{
BlendingState blend;
// Throw extras in bits we don't otherwise use
BitField<30, 1, bool, u32> is_points;
BitField<31, 1, bool, u32> is_lines;
};
FramebufferState framebuffer;
};

@@ -377,24 +385,17 @@ StoredPipeline CreatePipeline(const AbstractPipelineConfig& config)
auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[desc setVertexFunction:static_cast<const Shader*>(config.vertex_shader)->GetShader()];
[desc setFragmentFunction:static_cast<const Shader*>(config.pixel_shader)->GetShader()];
if (config.usage == AbstractPipelineUsage::GX)
{
if ([[[desc vertexFunction] label] containsString:@"Uber"])
[desc
setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]];
else
[desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]];
}
if (config.usage == AbstractPipelineUsage::GXUber)
[desc setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]];
else if (config.usage == AbstractPipelineUsage::GX)
[desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]];
else
{
[desc setLabel:[NSString stringWithFormat:@"Utility Pipeline %d", m_pipeline_counter[2]++]];
}
if (config.vertex_format)
[desc setVertexDescriptor:static_cast<const VertexFormat*>(config.vertex_format)->Get()];
RasterizationState rs = config.rasterization_state;
[desc setInputPrimitiveTopology:GetClass(rs.primitive)];
if (rs.cullmode == CullMode::All)
[desc setRasterizationEnabled:NO];
if (config.usage != AbstractPipelineUsage::GXUber)
[desc setInputPrimitiveTopology:GetClass(rs.primitive)];
MTLRenderPipelineColorAttachmentDescriptor* color0 =
[[desc colorAttachments] objectAtIndexedSubscript:0];
BlendingState bs = config.blending_state;

4 comments on commit 22197c0

@candid-crow
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dev update broke save state loading for me; reverting to 5.0-17392 restored functionality.

@jordan-woyak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dev update broke save state loading for me; reverting to 5.0-17392 restored functionality.

Breaking save states from older versions is intentional. Do newly made save states load properly?

@candid-crow
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

newly made save states do load properly, yes. If it's intentional, was there a note I missed that it would break loading older states? I ask because I've only been on the development build for a few weeks, but in that span this is the first time an update has disabled loading save states from a prior build--that I've noticed, anyway.

@JMC47
Copy link
Contributor Author

@JMC47 JMC47 commented on 22197c0 Sep 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's normal for builds that change logic to cause savestates to only work in older builds. It should tell you what version of Dolphin you need to use in order to load the savestate if you use it in a build that breaks compatibility.

Please sign in to comment.