Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #10781 from tellowkrinkle/UberVertexLoader
VideoCommon: Add dynamic vertex loader for ubershaders to reduce pipeline count
  • Loading branch information
JMC47 committed Sep 20, 2022
2 parents 4ea694a + ee692ab commit 22197c0
Show file tree
Hide file tree
Showing 36 changed files with 558 additions and 163 deletions.
2 changes: 1 addition & 1 deletion Source/Core/Core/State.cpp
Expand Up @@ -74,7 +74,7 @@ static std::recursive_mutex g_save_thread_mutex;
static std::thread g_save_thread;

// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 148; // Last changed in PR 10768
constexpr u32 STATE_VERSION = 149; // Last changed in PR 10781

// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/D3D/D3DMain.cpp
Expand Up @@ -112,6 +112,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter);
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = false;

g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames();
g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter);
Expand Down
78 changes: 60 additions & 18 deletions Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp
Expand Up @@ -180,7 +180,8 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline)
m_state.root_signature = dx_pipeline->GetRootSignature();
m_dirty_bits |= DirtyState_RootSignature | DirtyState_PS_CBV | DirtyState_VS_CBV |
DirtyState_GS_CBV | DirtyState_SRV_Descriptor |
DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor;
DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VS_SRV_Descriptor;
}
if (dx_pipeline->UseIntegerRTV() != m_state.using_integer_rtv)
{
Expand Down Expand Up @@ -362,6 +363,11 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
if (!ApplyState())
return;

// DX12 is great and doesn't include the base vertex in SV_VertexID
if (static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() ==
AbstractPipelineUsage::GXUber)
g_dx_context->GetCommandList()->SetGraphicsRoot32BitConstant(
ROOT_PARAMETER_BASE_VERTEX_CONSTANT, base_vertex, 0);
g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0);
}

Expand Down Expand Up @@ -494,18 +500,22 @@ void Renderer::SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle)
m_dirty_bits |= DirtyState_PS_UAV;
}

void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size)
void Renderer::SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv,
u32 stride, u32 size)
{
if (m_state.vertex_buffer.BufferLocation == address &&
m_state.vertex_buffer.StrideInBytes == stride && m_state.vertex_buffer.SizeInBytes == size)
if (m_state.vertex_buffer.BufferLocation != address ||
m_state.vertex_buffer.StrideInBytes != stride || m_state.vertex_buffer.SizeInBytes != size)
{
return;
m_state.vertex_buffer.BufferLocation = address;
m_state.vertex_buffer.StrideInBytes = stride;
m_state.vertex_buffer.SizeInBytes = size;
m_dirty_bits |= DirtyState_VertexBuffer;
}
if (m_state.vs_srv.ptr != srv.ptr)
{
m_state.vs_srv = srv;
m_dirty_bits |= DirtyState_VS_SRV;
}

m_state.vertex_buffer.BufferLocation = address;
m_state.vertex_buffer.StrideInBytes = stride;
m_state.vertex_buffer.SizeInBytes = size;
m_dirty_bits |= DirtyState_VertexBuffer;
}

void Renderer::SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format)
Expand Down Expand Up @@ -535,15 +545,17 @@ bool Renderer::ApplyState()
// Clear bits before actually changing state. Some state (e.g. cbuffers) can't be set
// if utility pipelines are bound.
const u32 dirty_bits = m_dirty_bits;
m_dirty_bits &= ~(
DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport | DirtyState_ScissorRect |
DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV |
DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology);
m_dirty_bits &=
~(DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Viewport |
DirtyState_ScissorRect | DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV |
DirtyState_GS_CBV | DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor |
DirtyState_UAV_Descriptor | DirtyState_VertexBuffer | DirtyState_IndexBuffer |
DirtyState_PrimitiveTopology | DirtyState_VS_SRV_Descriptor);

auto* const cmdlist = g_dx_context->GetCommandList();
auto* const pipeline = static_cast<const DXPipeline*>(m_current_pipeline);
if (dirty_bits & DirtyState_Pipeline)
cmdlist->SetPipelineState(static_cast<const DXPipeline*>(m_current_pipeline)->GetPipeline());
cmdlist->SetPipelineState(pipeline->GetPipeline());

if (dirty_bits & DirtyState_Framebuffer)
BindFramebuffer(static_cast<DXFramebuffer*>(m_current_framebuffer));
Expand Down Expand Up @@ -572,7 +584,7 @@ bool Renderer::ApplyState()
m_state.sampler_descriptor_base);
}

if (static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() == AbstractPipelineUsage::GX)
if (pipeline->GetUsage() != AbstractPipelineUsage::Utility)
{
if (dirty_bits & DirtyState_VS_CBV)
{
Expand All @@ -589,6 +601,13 @@ bool Renderer::ApplyState()
}
}

if (dirty_bits & DirtyState_VS_SRV_Descriptor &&
pipeline->GetUsage() == AbstractPipelineUsage::GXUber)
{
cmdlist->SetGraphicsRootDescriptorTable(ROOT_PARAMETER_VS_SRV,
m_state.vertex_srv_descriptor_base);
}

if (dirty_bits & DirtyState_GS_CBV)
{
cmdlist->SetGraphicsRootConstantBufferView(ROOT_PARAMETER_GS_CBV,
Expand Down Expand Up @@ -641,7 +660,9 @@ void Renderer::UpdateDescriptorTables()
const bool sampler_update_failed =
(m_dirty_bits & DirtyState_Samplers) && !UpdateSamplerDescriptorTable();
const bool uav_update_failed = (m_dirty_bits & DirtyState_PS_UAV) && !UpdateUAVDescriptorTable();
if (texture_update_failed || sampler_update_failed || uav_update_failed)
const bool srv_update_failed =
(m_dirty_bits & DirtyState_VS_SRV) && !UpdateVSSRVDescriptorTable();
if (texture_update_failed || sampler_update_failed || uav_update_failed || srv_update_failed)
{
WARN_LOG_FMT(VIDEO, "Executing command list while waiting for temporary {}",
texture_update_failed ? "descriptors" : "samplers");
Expand All @@ -651,6 +672,7 @@ void Renderer::UpdateDescriptorTables()
UpdateSRVDescriptorTable();
UpdateSamplerDescriptorTable();
UpdateUAVDescriptorTable();
UpdateVSSRVDescriptorTable();
}
}

Expand Down Expand Up @@ -700,6 +722,26 @@ bool Renderer::UpdateUAVDescriptorTable()
return true;
}

bool Renderer::UpdateVSSRVDescriptorTable()
{
if (!g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader ||
static_cast<const DXPipeline*>(m_current_pipeline)->GetUsage() !=
AbstractPipelineUsage::GXUber)
{
return true;
}

DescriptorHandle handle;
if (!g_dx_context->GetDescriptorAllocator()->Allocate(1, &handle))
return false;

g_dx_context->GetDevice()->CopyDescriptorsSimple(1, handle.cpu_handle, m_state.vs_srv,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
m_state.vertex_srv_descriptor_base = handle.gpu_handle;
m_dirty_bits = (m_dirty_bits & ~DirtyState_VS_SRV) | DirtyState_VS_SRV_Descriptor;
return true;
}

bool Renderer::UpdateComputeUAVDescriptorTable()
{
DescriptorHandle handle;
Expand Down
11 changes: 9 additions & 2 deletions Source/Core/VideoBackends/D3D12/D3D12Renderer.h
Expand Up @@ -88,7 +88,8 @@ class Renderer final : public ::Renderer
void SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);

// Graphics vertex/index buffer binding.
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size);
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, D3D12_CPU_DESCRIPTOR_HANDLE srv,
u32 stride, u32 size);
void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format);

// Binds all dirty state
Expand Down Expand Up @@ -126,14 +127,17 @@ class Renderer final : public ::Renderer
DirtyState_RootSignature = (1 << 17),
DirtyState_ComputeRootSignature = (1 << 18),
DirtyState_DescriptorHeaps = (1 << 19),
DirtyState_VS_SRV = (1 << 20),
DirtyState_VS_SRV_Descriptor = (1 << 21),

DirtyState_All =
DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Textures | DirtyState_Samplers |
DirtyState_Viewport | DirtyState_ScissorRect | DirtyState_ComputeImageTexture |
DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV |
DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology |
DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps
DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps |
DirtyState_VS_SRV | DirtyState_VS_SRV_Descriptor
};

void CheckForSwapChainChanges();
Expand All @@ -144,6 +148,7 @@ class Renderer final : public ::Renderer
void UpdateDescriptorTables();
bool UpdateSRVDescriptorTable();
bool UpdateUAVDescriptorTable();
bool UpdateVSSRVDescriptorTable();
bool UpdateComputeUAVDescriptorTable();
bool UpdateSamplerDescriptorTable();

Expand All @@ -157,11 +162,13 @@ class Renderer final : public ::Renderer
DXShader* compute_shader = nullptr;
std::array<D3D12_GPU_VIRTUAL_ADDRESS, 3> constant_buffers = {};
std::array<D3D12_CPU_DESCRIPTOR_HANDLE, MAX_TEXTURES> textures = {};
D3D12_CPU_DESCRIPTOR_HANDLE vs_srv = {};
D3D12_CPU_DESCRIPTOR_HANDLE ps_uav = {};
SamplerStateSet samplers = {};
const DXTexture* compute_image_texture = nullptr;
D3D12_VIEWPORT viewport = {};
D3D12_RECT scissor = {};
D3D12_GPU_DESCRIPTOR_HANDLE vertex_srv_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE srv_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE sampler_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE uav_descriptor_base = {};
Expand Down
15 changes: 14 additions & 1 deletion Source/Core/VideoBackends/D3D12/D3D12VertexManager.cpp
Expand Up @@ -64,6 +64,18 @@ bool VertexManager::Initialize()
&srv_desc, dh.cpu_handle);
}

if (!g_dx_context->GetDescriptorHeapManager().Allocate(&m_vertex_srv))
{
PanicAlertFmt("Failed to allocate descriptor for vertex srv");
return false;
}

D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {DXGI_FORMAT_R32_UINT, D3D12_SRV_DIMENSION_BUFFER,
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
srv_desc.Buffer.NumElements = m_vertex_stream_buffer.GetSize() / sizeof(u32);
g_dx_context->GetDevice()->CreateShaderResourceView(m_vertex_stream_buffer.GetBuffer(), &srv_desc,
m_vertex_srv.cpu_handle);

UploadAllConstants();
return true;
}
Expand Down Expand Up @@ -115,7 +127,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in
ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast<int>(vertex_data_size));
ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast<int>(index_data_size));

Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), vertex_stride,
Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(),
m_vertex_srv.cpu_handle, vertex_stride,
m_vertex_stream_buffer.GetSize());
Renderer::GetInstance()->SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(),
m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/D3D12/D3D12VertexManager.h
Expand Up @@ -46,6 +46,7 @@ class VertexManager final : public VertexManagerBase
StreamBuffer m_uniform_stream_buffer;
StreamBuffer m_texel_stream_buffer;
std::array<DescriptorHandle, NUM_TEXEL_BUFFER_FORMATS> m_texel_buffer_views = {};
DescriptorHandle m_vertex_srv = {};
};

} // namespace DX12
15 changes: 15 additions & 0 deletions Source/Core/VideoBackends/D3D12/DX12Context.cpp
Expand Up @@ -261,6 +261,16 @@ bool DXContext::CreateDescriptorHeaps()
return true;
}

static void SetRootParamConstant(D3D12_ROOT_PARAMETER* rp, u32 shader_reg, u32 num_values,
D3D12_SHADER_VISIBILITY visibility)
{
rp->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
rp->Constants.Num32BitValues = num_values;
rp->Constants.ShaderRegister = shader_reg;
rp->Constants.RegisterSpace = 0;
rp->ShaderVisibility = visibility;
}

static void SetRootParamCBV(D3D12_ROOT_PARAMETER* rp, u32 shader_reg,
D3D12_SHADER_VISIBILITY visibility)
{
Expand Down Expand Up @@ -345,6 +355,11 @@ bool DXContext::CreateGXRootSignature()
param_count++;
SetRootParamCBV(&params[param_count], 0, D3D12_SHADER_VISIBILITY_GEOMETRY);
param_count++;
SetRootParamTable(&params[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3,
1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
SetRootParamConstant(&params[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;

// Since these must be contiguous, pixel lighting goes to bbox if not enabled.
if (g_ActiveConfig.bBBoxEnable)
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoBackends/D3D12/DX12Context.h
Expand Up @@ -27,6 +27,8 @@ enum ROOT_PARAMETER
ROOT_PARAMETER_VS_CBV,
ROOT_PARAMETER_VS_CBV2,
ROOT_PARAMETER_GS_CBV,
ROOT_PARAMETER_VS_SRV,
ROOT_PARAMETER_BASE_VERTEX_CONSTANT,
ROOT_PARAMETER_PS_UAV_OR_CBV2,
ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled
NUM_ROOT_PARAMETERS
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/D3D12/DX12Pipeline.cpp
Expand Up @@ -165,6 +165,7 @@ std::unique_ptr<DXPipeline> DXPipeline::Create(const AbstractPipelineConfig& con
switch (config.usage)
{
case AbstractPipelineUsage::GX:
case AbstractPipelineUsage::GXUber:
desc.pRootSignature = g_dx_context->GetGXRootSignature();
break;
case AbstractPipelineUsage::Utility:
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoBackends/D3D12/VideoBackend.cpp
Expand Up @@ -87,6 +87,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.bSupportsDynamicVertexLoader = true;

// We can only check texture support once we have a device.
if (g_dx_context)
Expand Down
47 changes: 24 additions & 23 deletions Source/Core/VideoBackends/Metal/MTLObjectCache.mm
Expand Up @@ -220,7 +220,7 @@ static MTLCullMode Convert(CullMode cull)
switch (cull)
{
case CullMode::None:
case CullMode::All: // Handled by disabling rasterization
case CullMode::All: // Handled by VertexLoaderManager::RunVertices
return MTLCullModeNone;
case CullMode::Front:
return MTLCullModeFront;
Expand Down Expand Up @@ -289,7 +289,8 @@ explicit VertexAttribute(AttributeFormat format)
}
};
template <size_t N>
static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeFormat (&input)[N])
static void CopyAll(std::array<VertexAttribute, N>& output,
const std::array<AttributeFormat, N>& input)
{
for (size_t i = 0; i < N; ++i)
output[i] = VertexAttribute(input[i]);
Expand Down Expand Up @@ -327,13 +328,14 @@ static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeForma
blend.subtractAlpha = cfg.blending_state.subtractAlpha.Value();
// clang-format on
}
// Throw extras in bits we don't otherwise use
if (cfg.rasterization_state.cullmode == CullMode::All)
blend.hex |= 1 << 29;
if (cfg.rasterization_state.primitive == PrimitiveType::Points)
blend.hex |= 1 << 30;
else if (cfg.rasterization_state.primitive == PrimitiveType::Lines)
blend.hex |= 1 << 31;

if (cfg.usage != AbstractPipelineUsage::GXUber)
{
if (cfg.rasterization_state.primitive == PrimitiveType::Points)
is_points = true;
else if (cfg.rasterization_state.primitive == PrimitiveType::Lines)
is_lines = true;
}
}
PipelineID() { memset(this, 0, sizeof(*this)); }
PipelineID(const PipelineID& other) { memcpy(this, &other, sizeof(*this)); }
Expand All @@ -359,7 +361,13 @@ static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeForma
VertexAttribute v_posmtx;
const Shader* vertex_shader;
const Shader* fragment_shader;
BlendingState blend;
union
{
BlendingState blend;
// Throw extras in bits we don't otherwise use
BitField<30, 1, bool, u32> is_points;
BitField<31, 1, bool, u32> is_lines;
};
FramebufferState framebuffer;
};

Expand All @@ -377,24 +385,17 @@ StoredPipeline CreatePipeline(const AbstractPipelineConfig& config)
auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[desc setVertexFunction:static_cast<const Shader*>(config.vertex_shader)->GetShader()];
[desc setFragmentFunction:static_cast<const Shader*>(config.pixel_shader)->GetShader()];
if (config.usage == AbstractPipelineUsage::GX)
{
if ([[[desc vertexFunction] label] containsString:@"Uber"])
[desc
setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]];
else
[desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]];
}
if (config.usage == AbstractPipelineUsage::GXUber)
[desc setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]];
else if (config.usage == AbstractPipelineUsage::GX)
[desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]];
else
{
[desc setLabel:[NSString stringWithFormat:@"Utility Pipeline %d", m_pipeline_counter[2]++]];
}
if (config.vertex_format)
[desc setVertexDescriptor:static_cast<const VertexFormat*>(config.vertex_format)->Get()];
RasterizationState rs = config.rasterization_state;
[desc setInputPrimitiveTopology:GetClass(rs.primitive)];
if (rs.cullmode == CullMode::All)
[desc setRasterizationEnabled:NO];
if (config.usage != AbstractPipelineUsage::GXUber)
[desc setInputPrimitiveTopology:GetClass(rs.primitive)];
MTLRenderPipelineColorAttachmentDescriptor* color0 =
[[desc colorAttachments] objectAtIndexedSubscript:0];
BlendingState bs = config.blending_state;
Expand Down

4 comments on commit 22197c0

@candid-crow
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dev update broke save state loading for me; reverting to 5.0-17392 restored functionality.

@jordan-woyak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dev update broke save state loading for me; reverting to 5.0-17392 restored functionality.

Breaking save states from older versions is intentional. Do newly made save states load properly?

@candid-crow
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

newly made save states do load properly, yes. If it's intentional, was there a note I missed that it would break loading older states? I ask because I've only been on the development build for a few weeks, but in that span this is the first time an update has disabled loading save states from a prior build--that I've noticed, anyway.

@JMC47
Copy link
Contributor Author

@JMC47 JMC47 commented on 22197c0 Sep 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's normal for builds that change logic to cause savestates to only work in older builds. It should tell you what version of Dolphin you need to use in order to load the savestate if you use it in a build that breaks compatibility.

Please sign in to comment.