@@ -0,0 +1,94 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{570215B7-E32F-4438-95AE-C8D955F9FCA3}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="..\..\..\VSProps\Base.props" />
<Import Project="..\..\..\VSProps\PCHUse.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<ForcedIncludeFiles />
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<ForcedIncludeFiles />
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="BoundingBox.cpp" />
<ClCompile Include="DescriptorAllocator.cpp" />
<ClCompile Include="DXContext.cpp" />
<ClCompile Include="DescriptorHeapManager.cpp" />
<ClCompile Include="DXPipeline.cpp" />
<ClCompile Include="DXShader.cpp" />
<ClCompile Include="StreamBuffer.cpp" />
<ClCompile Include="DXTexture.cpp" />
<ClCompile Include="VideoBackend.cpp" />
<ClCompile Include="PerfQuery.cpp" />
<ClCompile Include="Renderer.cpp" />
<ClCompile Include="DXVertexFormat.cpp" />
<ClCompile Include="SwapChain.cpp" />
<ClCompile Include="VertexManager.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="Common.h" />
<ClInclude Include="DescriptorAllocator.h" />
<ClInclude Include="DXContext.h" />
<ClInclude Include="DescriptorHeapManager.h" />
<ClInclude Include="DXPipeline.h" />
<ClInclude Include="DXShader.h" />
<ClInclude Include="StreamBuffer.h" />
<ClInclude Include="DXTexture.h" />
<ClInclude Include="PerfQuery.h" />
<ClInclude Include="Renderer.h" />
<ClInclude Include="DXVertexFormat.h" />
<ClInclude Include="SwapChain.h" />
<ClInclude Include="VertexManager.h" />
<ClInclude Include="VideoBackend.h" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="$(CoreDir)VideoCommon\VideoCommon.vcxproj">
<Project>{3de9ee35-3e91-4f27-a014-2866ad8c3fe3}</Project>
</ProjectReference>
<ProjectReference Include="..\D3DCommon\D3DCommon.vcxproj">
<Project>{dea96cf2-f237-4a1a-b32f-c916769efb50}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="VideoBackend.cpp" />
<ClCompile Include="DXContext.cpp" />
<ClCompile Include="DXPipeline.cpp" />
<ClCompile Include="DXShader.cpp" />
<ClCompile Include="DXTexture.cpp" />
<ClCompile Include="DXVertexFormat.cpp" />
<ClCompile Include="StreamBuffer.cpp" />
<ClCompile Include="SwapChain.cpp" />
<ClCompile Include="PerfQuery.cpp" />
<ClCompile Include="Renderer.cpp" />
<ClCompile Include="VertexManager.cpp" />
<ClCompile Include="BoundingBox.cpp" />
<ClCompile Include="DescriptorHeapManager.cpp" />
<ClCompile Include="DescriptorAllocator.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="VideoBackend.h" />
<ClInclude Include="SwapChain.h" />
<ClInclude Include="DXContext.h" />
<ClInclude Include="DXPipeline.h" />
<ClInclude Include="DXShader.h" />
<ClInclude Include="DXTexture.h" />
<ClInclude Include="DXVertexFormat.h" />
<ClInclude Include="StreamBuffer.h" />
<ClInclude Include="VertexManager.h" />
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="PerfQuery.h" />
<ClInclude Include="Renderer.h" />
<ClInclude Include="Common.h" />
<ClInclude Include="DescriptorHeapManager.h" />
<ClInclude Include="DescriptorAllocator.h" />
</ItemGroup>
</Project>

Large diffs are not rendered by default.

@@ -0,0 +1,191 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/DescriptorAllocator.h"
#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "VideoBackends/D3D12/StreamBuffer.h"

#include <array>
#include <functional>
#include <map>

struct IDXGIFactory2;

namespace DX12
{
// Vertex/Pixel shader root parameters
enum ROOT_PARAMETER
{
ROOT_PARAMETER_PS_CBV,
ROOT_PARAMETER_PS_SRV,
ROOT_PARAMETER_PS_SAMPLERS,
ROOT_PARAMETER_VS_CBV,
ROOT_PARAMETER_GS_CBV,
ROOT_PARAMETER_PS_UAV_OR_CBV2,
ROOT_PARAMETER_PS_CBV2, // ROOT_PARAMETER_PS_UAV_OR_CBV2 if bbox is not enabled
NUM_ROOT_PARAMETERS
};
// Compute shader root parameters
enum CS_ROOT_PARAMETERS
{
CS_ROOT_PARAMETER_CBV,
CS_ROOT_PARAMETER_SRV,
CS_ROOT_PARAMETER_SAMPLERS,
CS_ROOT_PARAMETER_UAV,
NUM_CS_ROOT_PARAMETERS,
};

class DXContext
{
public:
~DXContext();

// Returns a list of AA modes.
static std::vector<u32> GetAAModes(u32 adapter_index);

// Creates new device and context.
static bool Create(u32 adapter_index, bool enable_debug_layer);

// Destroys active context.
static void Destroy();

IDXGIFactory2* GetDXGIFactory() const { return m_dxgi_factory.Get(); }
ID3D12Device* GetDevice() const { return m_device.Get(); }
ID3D12CommandQueue* GetCommandQueue() const { return m_command_queue.Get(); }

// Returns the current command list, commands can be recorded directly.
ID3D12GraphicsCommandList* GetCommandList() const
{
return m_command_lists[m_current_command_list].command_list.Get();
}
DescriptorAllocator* GetDescriptorAllocator()
{
return &m_command_lists[m_current_command_list].descriptor_allocator;
}
SamplerAllocator* GetSamplerAllocator()
{
return &m_command_lists[m_current_command_list].sampler_allocator;
}

// Descriptor manager access.
DescriptorHeapManager& GetDescriptorHeapManager() { return m_descriptor_heap_manager; }
DescriptorHeapManager& GetRTVHeapManager() { return m_rtv_heap_manager; }
DescriptorHeapManager& GetDSVHeapManager() { return m_dsv_heap_manager; }
SamplerHeapManager& GetSamplerHeapManager() { return m_sampler_heap_manager; }
ID3D12DescriptorHeap* const* GetGPUDescriptorHeaps() const
{
return m_gpu_descriptor_heaps.data();
}
u32 GetGPUDescriptorHeapCount() const { return static_cast<u32>(m_gpu_descriptor_heaps.size()); }
const DescriptorHandle& GetNullSRVDescriptor() const { return m_null_srv_descriptor; }

// Root signature access.
ID3D12RootSignature* GetGXRootSignature() const { return m_gx_root_signature.Get(); }
ID3D12RootSignature* GetUtilityRootSignature() const { return m_utility_root_signature.Get(); }
ID3D12RootSignature* GetComputeRootSignature() const { return m_compute_root_signature.Get(); }

// Fence value for current command list.
u64 GetCurrentFenceValue() const { return m_current_fence_value; }

// Last "completed" fence.
u64 GetCompletedFenceValue() const { return m_completed_fence_value; }

// Texture streaming buffer for uploads.
StreamBuffer& GetTextureUploadBuffer() { return m_texture_upload_buffer; }

// Feature level to use when compiling shaders.
D3D_FEATURE_LEVEL GetFeatureLevel() const { return m_feature_level; }

// Test for support for the specified texture format.
bool SupportsTextureFormat(DXGI_FORMAT format);

// Creates command lists, global buffers and descriptor heaps.
bool CreateGlobalResources();

// Executes the current command list.
void ExecuteCommandList(bool wait_for_completion);

// Waits for a specific fence.
void WaitForFence(u64 fence);

// Defers destruction of a D3D resource (associates it with the current list).
void DeferResourceDestruction(ID3D12Resource* resource);

// Defers destruction of a descriptor handle (associates it with the current list).
void DeferDescriptorDestruction(DescriptorHeapManager& manager, u32 index);

// Clears all samplers from the per-frame allocators.
void ResetSamplerAllocators();

// Re-creates the root signature. Call when the host config changes (e.g. bbox/per-pixel shading).
void RecreateGXRootSignature();

private:
// Number of command lists. One is being built while the other(s) are executed.
static const u32 NUM_COMMAND_LISTS = 3;

// Textures that don't fit into this buffer will be uploaded with a staging buffer.
static const u32 TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;

struct CommandListResources
{
ComPtr<ID3D12CommandAllocator> command_allocator;
ComPtr<ID3D12GraphicsCommandList> command_list;
DescriptorAllocator descriptor_allocator;
SamplerAllocator sampler_allocator;
std::vector<ID3D12Resource*> pending_resources;
std::vector<std::pair<DescriptorHeapManager&, u32>> pending_descriptors;
u64 ready_fence_value = 0;
};

DXContext();

bool CreateDXGIFactory(bool enable_debug_layer);
bool CreateDevice(u32 adapter_index, bool enable_debug_layer);
bool CreateCommandQueue();
bool CreateFence();
bool CreateDescriptorHeaps();
bool CreateRootSignatures();
bool CreateGXRootSignature();
bool CreateUtilityRootSignature();
bool CreateComputeRootSignature();
bool CreateTextureUploadBuffer();
bool CreateCommandLists();
void MoveToNextCommandList();
void DestroyPendingResources(CommandListResources& cmdlist);

ComPtr<IDXGIFactory2> m_dxgi_factory;
ComPtr<ID3D12Debug> m_debug_interface;
ComPtr<ID3D12Device> m_device;
ComPtr<ID3D12CommandQueue> m_command_queue;

ComPtr<ID3D12Fence> m_fence = nullptr;
HANDLE m_fence_event = {};
u32 m_current_fence_value = 0;
u64 m_completed_fence_value = 0;

std::array<CommandListResources, NUM_COMMAND_LISTS> m_command_lists;
u32 m_current_command_list = NUM_COMMAND_LISTS - 1;

DescriptorHeapManager m_descriptor_heap_manager;
DescriptorHeapManager m_rtv_heap_manager;
DescriptorHeapManager m_dsv_heap_manager;
SamplerHeapManager m_sampler_heap_manager;
std::array<ID3D12DescriptorHeap*, 2> m_gpu_descriptor_heaps = {};
DescriptorHandle m_null_srv_descriptor;
D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0;

ComPtr<ID3D12RootSignature> m_gx_root_signature;
ComPtr<ID3D12RootSignature> m_utility_root_signature;
ComPtr<ID3D12RootSignature> m_compute_root_signature;

StreamBuffer m_texture_upload_buffer;
};

extern std::unique_ptr<DXContext> g_dx_context;

} // namespace DX12
@@ -0,0 +1,217 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "Common/Assert.h"
#include "Common/MsgHandler.h"

#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/DXContext.h"
#include "VideoBackends/D3D12/DXPipeline.h"
#include "VideoBackends/D3D12/DXShader.h"
#include "VideoBackends/D3D12/DXTexture.h"
#include "VideoBackends/D3D12/DXVertexFormat.h"

namespace DX12
{
DXPipeline::DXPipeline(ID3D12PipelineState* pipeline, ID3D12RootSignature* root_signature,
AbstractPipelineUsage usage, D3D12_PRIMITIVE_TOPOLOGY primitive_topology,
bool use_integer_rtv)
: m_pipeline(pipeline), m_root_signature(root_signature), m_usage(usage),
m_primitive_topology(primitive_topology), m_use_integer_rtv(use_integer_rtv)
{
}

DXPipeline::~DXPipeline()
{
m_pipeline->Release();
}

static D3D12_PRIMITIVE_TOPOLOGY GetD3DTopology(const RasterizationState& state)
{
switch (state.primitive)
{
case PrimitiveType::Points:
return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
case PrimitiveType::Lines:
return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
case PrimitiveType::Triangles:
return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
case PrimitiveType::TriangleStrip:
default:
return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
}
}

static D3D12_PRIMITIVE_TOPOLOGY_TYPE GetD3DTopologyType(const RasterizationState& state)
{
switch (state.primitive)
{
case PrimitiveType::Points:
return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
case PrimitiveType::Lines:
return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
case PrimitiveType::Triangles:
case PrimitiveType::TriangleStrip:
default:
return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
}
}

static void GetD3DRasterizerDesc(D3D12_RASTERIZER_DESC* desc, const RasterizationState& rs_state,
const FramebufferState& fb_state)
{
// No CULL_ALL here.
static constexpr std::array<D3D12_CULL_MODE, 4> cull_modes = {
{D3D12_CULL_MODE_NONE, D3D12_CULL_MODE_BACK, D3D12_CULL_MODE_FRONT, D3D12_CULL_MODE_FRONT}};

desc->FillMode = D3D12_FILL_MODE_SOLID;
desc->CullMode = cull_modes[rs_state.cullmode];
desc->MultisampleEnable = fb_state.samples > 1;
}

static void GetD3DDepthDesc(D3D12_DEPTH_STENCIL_DESC* desc, const DepthState& state)
{
// Less/greater are swapped due to inverted depth.
static constexpr std::array<D3D12_COMPARISON_FUNC, 8> compare_funcs = {
{D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_GREATER, D3D12_COMPARISON_FUNC_EQUAL,
D3D12_COMPARISON_FUNC_GREATER_EQUAL, D3D12_COMPARISON_FUNC_LESS,
D3D12_COMPARISON_FUNC_NOT_EQUAL, D3D12_COMPARISON_FUNC_LESS_EQUAL,
D3D12_COMPARISON_FUNC_ALWAYS}};

desc->DepthEnable = state.testenable;
desc->DepthFunc = compare_funcs[state.func];
desc->DepthWriteMask =
state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
}

static void GetD3DBlendDesc(D3D12_BLEND_DESC* desc, const BlendingState& state)
{
static constexpr std::array<D3D12_BLEND, 8> src_dual_src_factors = {
{D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_DEST_COLOR, D3D12_BLEND_INV_DEST_COLOR,
D3D12_BLEND_SRC1_ALPHA, D3D12_BLEND_INV_SRC1_ALPHA, D3D12_BLEND_DEST_ALPHA,
D3D12_BLEND_INV_DEST_ALPHA}};
static constexpr std::array<D3D12_BLEND, 8> dst_dual_src_factors = {
{D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_SRC_COLOR, D3D12_BLEND_INV_SRC_COLOR,
D3D12_BLEND_SRC1_ALPHA, D3D12_BLEND_INV_SRC1_ALPHA, D3D12_BLEND_DEST_ALPHA,
D3D12_BLEND_INV_DEST_ALPHA}};
static constexpr std::array<D3D12_BLEND, 8> src_factors = {
{D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_DEST_COLOR, D3D12_BLEND_INV_DEST_COLOR,
D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_DEST_ALPHA,
D3D12_BLEND_INV_DEST_ALPHA}};

static constexpr std::array<D3D12_BLEND, 8> dst_factors = {
{D3D12_BLEND_ZERO, D3D12_BLEND_ONE, D3D12_BLEND_SRC_COLOR, D3D12_BLEND_INV_SRC_COLOR,
D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_DEST_ALPHA,
D3D12_BLEND_INV_DEST_ALPHA}};

static constexpr std::array<D3D12_LOGIC_OP, 16> logic_ops = {
{D3D12_LOGIC_OP_CLEAR, D3D12_LOGIC_OP_AND, D3D12_LOGIC_OP_AND_REVERSE, D3D12_LOGIC_OP_COPY,
D3D12_LOGIC_OP_AND_INVERTED, D3D12_LOGIC_OP_NOOP, D3D12_LOGIC_OP_XOR, D3D12_LOGIC_OP_OR,
D3D12_LOGIC_OP_NOR, D3D12_LOGIC_OP_EQUIV, D3D12_LOGIC_OP_INVERT, D3D12_LOGIC_OP_OR_REVERSE,
D3D12_LOGIC_OP_COPY_INVERTED, D3D12_LOGIC_OP_OR_INVERTED, D3D12_LOGIC_OP_NAND,
D3D12_LOGIC_OP_SET}};

desc->AlphaToCoverageEnable = FALSE;
desc->IndependentBlendEnable = FALSE;

D3D12_RENDER_TARGET_BLEND_DESC* rtblend = &desc->RenderTarget[0];
if (state.colorupdate)
{
rtblend->RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_RED |
D3D12_COLOR_WRITE_ENABLE_GREEN |
D3D12_COLOR_WRITE_ENABLE_BLUE;
}
if (state.alphaupdate)
{
rtblend->RenderTargetWriteMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
}

// blend takes precedence over logic op
rtblend->BlendEnable = state.blendenable;
if (state.blendenable)
{
rtblend->BlendOp = state.subtract ? D3D12_BLEND_OP_REV_SUBTRACT : D3D12_BLEND_OP_ADD;
rtblend->BlendOpAlpha = state.subtractAlpha ? D3D12_BLEND_OP_REV_SUBTRACT : D3D12_BLEND_OP_ADD;
if (state.usedualsrc)
{
rtblend->SrcBlend = src_dual_src_factors[state.srcfactor];
rtblend->SrcBlendAlpha = src_dual_src_factors[state.srcfactoralpha];
rtblend->DestBlend = dst_dual_src_factors[state.dstfactor];
rtblend->DestBlendAlpha = dst_dual_src_factors[state.dstfactoralpha];
}
else
{
rtblend->SrcBlend = src_factors[state.srcfactor];
rtblend->SrcBlendAlpha = src_factors[state.srcfactoralpha];
rtblend->DestBlend = dst_factors[state.dstfactor];
rtblend->DestBlendAlpha = dst_factors[state.dstfactoralpha];
}
}
else
{
rtblend->LogicOpEnable = state.logicopenable;
if (state.logicopenable)
rtblend->LogicOp = logic_ops[state.logicmode];
}
}

std::unique_ptr<DXPipeline> DXPipeline::Create(const AbstractPipelineConfig& config)
{
DEBUG_ASSERT(config.vertex_shader && config.pixel_shader);

D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
switch (config.usage)
{
case AbstractPipelineUsage::GX:
desc.pRootSignature = g_dx_context->GetGXRootSignature();
break;
case AbstractPipelineUsage::Utility:
desc.pRootSignature = g_dx_context->GetUtilityRootSignature();
break;
default:
PanicAlert("Unknown pipeline layout.");
return nullptr;
}

if (config.vertex_shader)
desc.VS = static_cast<const DXShader*>(config.vertex_shader)->GetD3DByteCode();
if (config.geometry_shader)
desc.GS = static_cast<const DXShader*>(config.geometry_shader)->GetD3DByteCode();
if (config.pixel_shader)
desc.PS = static_cast<const DXShader*>(config.pixel_shader)->GetD3DByteCode();

GetD3DBlendDesc(&desc.BlendState, config.blending_state);
desc.SampleMask = 0xFFFFFFFF;
GetD3DRasterizerDesc(&desc.RasterizerState, config.rasterization_state, config.framebuffer_state);
GetD3DDepthDesc(&desc.DepthStencilState, config.depth_state);
if (config.vertex_format)
static_cast<const DXVertexFormat*>(config.vertex_format)->GetInputLayoutDesc(&desc.InputLayout);
desc.IBStripCutValue = config.rasterization_state.primitive == PrimitiveType::TriangleStrip ?
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF :
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
desc.PrimitiveTopologyType = GetD3DTopologyType(config.rasterization_state);
if (config.framebuffer_state.color_texture_format != AbstractTextureFormat::Undefined)
{
desc.NumRenderTargets = 1;
desc.RTVFormats[0] = D3DCommon::GetRTVFormatForAbstractFormat(
config.framebuffer_state.color_texture_format, config.blending_state.logicopenable);
}
if (config.framebuffer_state.depth_texture_format != AbstractTextureFormat::Undefined)
desc.DSVFormat =
D3DCommon::GetDSVFormatForAbstractFormat(config.framebuffer_state.depth_texture_format);
desc.SampleDesc.Count = config.framebuffer_state.samples;
desc.NodeMask = 1;

ID3D12PipelineState* pso;
HRESULT hr = g_dx_context->GetDevice()->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso));
CHECK(SUCCEEDED(hr), "Create PSO");
if (FAILED(hr))
return nullptr;

const bool use_integer_rtv =
!config.blending_state.blendenable && config.blending_state.logicopenable;
return std::make_unique<DXPipeline>(pso, desc.pRootSignature, config.usage,
GetD3DTopology(config.rasterization_state), use_integer_rtv);
}
} // namespace DX12
@@ -0,0 +1,38 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <d3d12.h>
#include <memory>

#include "VideoCommon/AbstractPipeline.h"

namespace DX12
{
class DXPipeline final : public AbstractPipeline
{
public:
DXPipeline(ID3D12PipelineState* pipeline, ID3D12RootSignature* root_signature,
AbstractPipelineUsage usage, D3D12_PRIMITIVE_TOPOLOGY primitive_topology,
bool use_integer_rtv);
~DXPipeline() override;

static std::unique_ptr<DXPipeline> Create(const AbstractPipelineConfig& config);

ID3D12PipelineState* GetPipeline() const { return m_pipeline; }
ID3D12RootSignature* GetRootSignature() const { return m_root_signature; }
AbstractPipelineUsage GetUsage() const { return m_usage; }
D3D12_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_primitive_topology; }
bool UseIntegerRTV() const { return m_use_integer_rtv; }

private:
ID3D12PipelineState* m_pipeline;
ID3D12RootSignature* m_root_signature;
AbstractPipelineUsage m_usage;
D3D12_PRIMITIVE_TOPOLOGY m_primitive_topology;
bool m_use_integer_rtv;
};

} // namespace DX12
@@ -0,0 +1,55 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/DXShader.h"
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/DXContext.h"

namespace DX12
{
DXShader::DXShader(ShaderStage stage, BinaryData bytecode)
: D3DCommon::Shader(stage, std::move(bytecode))
{
}

DXShader::~DXShader() = default;

std::unique_ptr<DXShader> DXShader::CreateFromBytecode(ShaderStage stage, BinaryData bytecode)
{
std::unique_ptr<DXShader> shader(new DXShader(stage, std::move(bytecode)));
if (stage == ShaderStage::Compute && !shader->CreateComputePipeline())
return nullptr;

return shader;
}

std::unique_ptr<DXShader> DXShader::CreateFromSource(ShaderStage stage, const char* source,
size_t length)
{
BinaryData bytecode;
if (!CompileShader(g_dx_context->GetFeatureLevel(), &bytecode, stage, source, length))
return nullptr;

return CreateFromBytecode(stage, std::move(bytecode));
}

D3D12_SHADER_BYTECODE DXShader::GetD3DByteCode() const
{
return D3D12_SHADER_BYTECODE{m_bytecode.data(), m_bytecode.size()};
}

bool DXShader::CreateComputePipeline()
{
D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {};
desc.pRootSignature = g_dx_context->GetComputeRootSignature();
desc.CS = GetD3DByteCode();
desc.NodeMask = 1;

HRESULT hr = g_dx_context->GetDevice()->CreateComputePipelineState(
&desc, IID_PPV_ARGS(&m_compute_pipeline));
CHECK(SUCCEEDED(hr), "Creating compute pipeline failed");
return SUCCEEDED(hr);
}

} // namespace DX12
@@ -0,0 +1,32 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once
#include <memory>
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3DCommon/Shader.h"

namespace DX12
{
class DXShader final : public D3DCommon::Shader
{
public:
~DXShader() override;

ID3D12PipelineState* GetComputePipeline() const { return m_compute_pipeline.Get(); }
D3D12_SHADER_BYTECODE GetD3DByteCode() const;

static std::unique_ptr<DXShader> CreateFromBytecode(ShaderStage stage, BinaryData bytecode);
static std::unique_ptr<DXShader> CreateFromSource(ShaderStage stage, const char* source,
size_t length);

private:
DXShader(ShaderStage stage, BinaryData bytecode);

bool CreateComputePipeline();

ComPtr<ID3D12PipelineState> m_compute_pipeline;
};

} // namespace DX12

Large diffs are not rendered by default.

@@ -0,0 +1,126 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <memory>
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/AbstractTexture.h"

namespace DX12
{
class DXTexture final : public AbstractTexture
{
public:
~DXTexture();

static std::unique_ptr<DXTexture> Create(const TextureConfig& config);
static std::unique_ptr<DXTexture> CreateAdopted(ID3D12Resource* resource);

void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) override;
void CopyRectangleFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect, u32 src_layer,
u32 src_level, const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level) override;
void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level) override;
void FinishedRendering() override;

ID3D12Resource* GetResource() const { return m_resource.Get(); }
const DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; }
const DescriptorHandle& GetUAVDescriptor() const { return m_uav_descriptor; }
D3D12_RESOURCE_STATES GetState() const { return m_state; }
u32 CalcSubresource(u32 level, u32 layer) const { return level + layer * m_config.layers; }

void TransitionToState(D3D12_RESOURCE_STATES state) const;

// Destoys the resource backing this texture. The resource must not be in use by the GPU.
void DestroyResource();

private:
DXTexture(const TextureConfig& config, ID3D12Resource* resource, D3D12_RESOURCE_STATES state);

bool CreateSRVDescriptor();
bool CreateUAVDescriptor();

ComPtr<ID3D12Resource> m_resource;
DescriptorHandle m_srv_descriptor = {};
DescriptorHandle m_uav_descriptor = {};

mutable D3D12_RESOURCE_STATES m_state;
};

class DXFramebuffer final : public AbstractFramebuffer
{
public:
~DXFramebuffer() override;

const DescriptorHandle& GetRTVDescriptor() const { return m_rtv_descriptor; }
const DescriptorHandle& GetIntRTVDescriptor() const { return m_int_rtv_descriptor; }
const DescriptorHandle& GetDSVDescriptor() const { return m_dsv_descriptor; }

UINT GetRTVDescriptorCount() const { return m_color_attachment ? 1 : 0; }
const D3D12_CPU_DESCRIPTOR_HANDLE* GetRTVDescriptorArray() const
{
return m_color_attachment ? &m_rtv_descriptor.cpu_handle : nullptr;
}
const D3D12_CPU_DESCRIPTOR_HANDLE* GetIntRTVDescriptorArray() const
{
return m_color_attachment ? &m_int_rtv_descriptor.cpu_handle : nullptr;
}
const D3D12_CPU_DESCRIPTOR_HANDLE* GetDSVDescriptorArray() const
{
return m_depth_attachment ? &m_dsv_descriptor.cpu_handle : nullptr;
}

static std::unique_ptr<DXFramebuffer> Create(DXTexture* color_attachment,
DXTexture* depth_attachment);

private:
DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment,
AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width,
u32 height, u32 layers, u32 samples);

bool CreateRTVDescriptor();
bool CreateDSVDescriptor();

DescriptorHandle m_rtv_descriptor = {};
DescriptorHandle m_int_rtv_descriptor = {};
DescriptorHandle m_dsv_descriptor = {};
};

class DXStagingTexture final : public AbstractStagingTexture
{
public:
~DXStagingTexture();

void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& src_rect,
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect) override;
void CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
const MathUtil::Rectangle<int>& dst_rect, u32 dst_layer,
u32 dst_level) override;

bool Map() override;
void Unmap() override;
void Flush() override;

static std::unique_ptr<DXStagingTexture> Create(StagingTextureType type,
const TextureConfig& config);

private:
DXStagingTexture(StagingTextureType type, const TextureConfig& config, ID3D12Resource* resource,
u32 stride, u32 buffer_size);

ComPtr<ID3D12Resource> m_resource;
u64 m_completed_fence = 0;
u32 m_buffer_size;
};

} // namespace DX12
@@ -0,0 +1,130 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/DXVertexFormat.h"

#include "Common/Assert.h"

#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"

namespace DX12
{
static DXGI_FORMAT VarToDXGIFormat(VarType t, u32 components, bool integer)
{
// NOTE: 3-component formats are not valid.
static const DXGI_FORMAT float_type_lookup[][4] = {
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UNORM}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM,
DXGI_FORMAT_R8G8B8A8_SNORM}, // VAR_BYTE
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_UNORM}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16B16A16_SNORM,
DXGI_FORMAT_R16G16B16A16_SNORM}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
};

static const DXGI_FORMAT integer_type_lookup[][4] = {
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_UINT}, // VAR_UNSIGNED_BYTE
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R8G8B8A8_SINT}, // VAR_BYTE
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_UINT}, // VAR_UNSIGNED_SHORT
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R16G16B16A16_SINT}, // VAR_SHORT
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R32G32B32A32_FLOAT} // VAR_FLOAT
};

ASSERT(components > 0 && components <= 4);
return integer ? integer_type_lookup[t][components - 1] : float_type_lookup[t][components - 1];
}

DXVertexFormat::DXVertexFormat(const PortableVertexDeclaration& vtx_decl)
: NativeVertexFormat(vtx_decl)
{
MapAttributes();
}

void DXVertexFormat::GetInputLayoutDesc(D3D12_INPUT_LAYOUT_DESC* desc) const
{
desc->pInputElementDescs = m_attribute_descriptions.data();
desc->NumElements = m_num_attributes;
}

void DXVertexFormat::AddAttribute(const char* semantic_name, u32 semantic_index, u32 slot,
DXGI_FORMAT format, u32 offset)
{
ASSERT(m_num_attributes < MAX_VERTEX_ATTRIBUTES);

auto* attr_desc = &m_attribute_descriptions[m_num_attributes];
attr_desc->SemanticName = semantic_name;
attr_desc->SemanticIndex = semantic_index;
attr_desc->Format = format;
attr_desc->InputSlot = slot;
attr_desc->AlignedByteOffset = offset;
attr_desc->InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
attr_desc->InstanceDataStepRate = 0;

m_num_attributes++;
}

void DXVertexFormat::MapAttributes()
{
m_num_attributes = 0;

if (m_decl.position.enable)
{
AddAttribute(
"POSITION", 0, 0,
VarToDXGIFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer),
m_decl.position.offset);
}

for (uint32_t i = 0; i < 3; i++)
{
if (m_decl.normals[i].enable)
{
AddAttribute("NORMAL", i, 0,
VarToDXGIFormat(m_decl.normals[i].type, m_decl.normals[i].components,
m_decl.normals[i].integer),
m_decl.normals[i].offset);
}
}

for (uint32_t i = 0; i < 2; i++)
{
if (m_decl.colors[i].enable)
{
AddAttribute("COLOR", i, 0,
VarToDXGIFormat(m_decl.colors[i].type, m_decl.colors[i].components,
m_decl.colors[i].integer),
m_decl.colors[i].offset);
}
}

for (uint32_t i = 0; i < 8; i++)
{
if (m_decl.texcoords[i].enable)
{
AddAttribute("TEXCOORD", i, 0,
VarToDXGIFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components,
m_decl.texcoords[i].integer),
m_decl.texcoords[i].offset);
}
}

if (m_decl.posmtx.enable)
{
AddAttribute(
"BLENDINDICES", 0, 0,
VarToDXGIFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer),
m_decl.posmtx.offset);
}
}

} // namespace DX12
@@ -0,0 +1,33 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <array>
#include <d3d12.h>

#include "Common/CommonTypes.h"
#include "VideoCommon/NativeVertexFormat.h"

namespace DX12
{
class DXVertexFormat : public NativeVertexFormat
{
public:
static const u32 MAX_VERTEX_ATTRIBUTES = 16;

DXVertexFormat(const PortableVertexDeclaration& vtx_decl);

// Passed to pipeline state creation
void GetInputLayoutDesc(D3D12_INPUT_LAYOUT_DESC* desc) const;

private:
void AddAttribute(const char* semantic_name, u32 semantic_index, u32 slot, DXGI_FORMAT format,
u32 offset);
void MapAttributes();

std::array<D3D12_INPUT_ELEMENT_DESC, MAX_VERTEX_ATTRIBUTES> m_attribute_descriptions = {};
u32 m_num_attributes = 0;
};
} // namespace Vulkan
@@ -0,0 +1,121 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/DescriptorAllocator.h"
#include "VideoBackends/D3D12/DXContext.h"

namespace DX12
{
DescriptorAllocator::DescriptorAllocator() = default;
DescriptorAllocator::~DescriptorAllocator() = default;

bool DescriptorAllocator::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type,
u32 num_descriptors)
{
const D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast<UINT>(num_descriptors),
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE};
HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap));
CHECK(SUCCEEDED(hr), "Creating descriptor heap for linear allocator failed");
if (FAILED(hr))
return false;

m_num_descriptors = num_descriptors;
m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type);
m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart();
return true;
}

bool DescriptorAllocator::Allocate(u32 num_handles, DescriptorHandle* out_base_handle)
{
if ((m_current_offset + num_handles) > m_num_descriptors)
return false;

out_base_handle->index = m_current_offset;
out_base_handle->cpu_handle.ptr =
m_heap_base_cpu.ptr + m_current_offset * m_descriptor_increment_size;
out_base_handle->gpu_handle.ptr =
m_heap_base_gpu.ptr + m_current_offset * m_descriptor_increment_size;
m_current_offset += num_handles;
return true;
}

void DescriptorAllocator::Reset()
{
m_current_offset = 0;
}

bool operator==(const SamplerStateSet& lhs, const SamplerStateSet& rhs)
{
// There shouldn't be any padding here, so this will be safe.
return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) == 0;
}

bool operator!=(const SamplerStateSet& lhs, const SamplerStateSet& rhs)
{
return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) != 0;
}

bool operator<(const SamplerStateSet& lhs, const SamplerStateSet& rhs)
{
return std::memcmp(lhs.states, rhs.states, sizeof(lhs.states)) < 0;
}

SamplerAllocator::SamplerAllocator() = default;
SamplerAllocator::~SamplerAllocator() = default;

bool SamplerAllocator::Create(ID3D12Device* device)
{
return DescriptorAllocator::Create(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE);
}

bool SamplerAllocator::GetGroupHandle(const SamplerStateSet& sss,
D3D12_GPU_DESCRIPTOR_HANDLE* handle)
{
auto it = m_sampler_map.find(sss);
if (it != m_sampler_map.end())
{
*handle = it->second;
return true;
}

// Allocate a group of descriptors.
DescriptorHandle allocation;
if (!Allocate(SamplerStateSet::NUM_SAMPLERS_PER_GROUP, &allocation))
return false;

// Lookup sampler handles from global cache.
std::array<D3D12_CPU_DESCRIPTOR_HANDLE, SamplerStateSet::NUM_SAMPLERS_PER_GROUP> source_handles;
for (u32 i = 0; i < SamplerStateSet::NUM_SAMPLERS_PER_GROUP; i++)
{
if (!g_dx_context->GetSamplerHeapManager().Lookup(sss.states[i], &source_handles[i]))
return false;
}

// Copy samplers from the sampler heap.
static constexpr std::array<UINT, SamplerStateSet::NUM_SAMPLERS_PER_GROUP> source_sizes = {
{1, 1, 1, 1, 1, 1, 1, 1}};
g_dx_context->GetDevice()->CopyDescriptors(
1, &allocation.cpu_handle, &SamplerStateSet::NUM_SAMPLERS_PER_GROUP,
SamplerStateSet::NUM_SAMPLERS_PER_GROUP, source_handles.data(), source_sizes.data(),
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
*handle = allocation.gpu_handle;
m_sampler_map.emplace(sss, allocation.gpu_handle);
return true;
}

bool SamplerAllocator::ShouldReset() const
{
// We only reset the sampler heap if more than half of the descriptors are used.
// This saves descriptor copying when there isn't a large number of sampler configs per frame.
return m_sampler_map.size() >= (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE / 2);
}

void SamplerAllocator::Reset()
{
DescriptorAllocator::Reset();
m_sampler_map.clear();
}
} // namespace DX12
@@ -0,0 +1,61 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <map>
#include "VideoBackends/D3D12/DescriptorHeapManager.h"

namespace DX12
{
class DescriptorAllocator
{
public:
DescriptorAllocator();
~DescriptorAllocator();

ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); }
u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; }

bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors);

bool Allocate(u32 num_handles, DescriptorHandle* out_base_handle);
void Reset();

protected:
ComPtr<ID3D12DescriptorHeap> m_descriptor_heap;
u32 m_descriptor_increment_size = 0;
u32 m_num_descriptors = 0;
u32 m_current_offset = 0;

D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {};
};

struct SamplerStateSet final
{
static const u32 NUM_SAMPLERS_PER_GROUP = 8;
SamplerState states[NUM_SAMPLERS_PER_GROUP];
};

bool operator==(const SamplerStateSet& lhs, const SamplerStateSet& rhs);
bool operator!=(const SamplerStateSet& lhs, const SamplerStateSet& rhs);
bool operator<(const SamplerStateSet& lhs, const SamplerStateSet& rhs);

class SamplerAllocator final : public DescriptorAllocator
{
public:
SamplerAllocator();
~SamplerAllocator();

bool Create(ID3D12Device* device);
bool GetGroupHandle(const SamplerStateSet& sss, D3D12_GPU_DESCRIPTOR_HANDLE* handle);
bool ShouldReset() const;
void Reset();

private:
std::map<SamplerStateSet, D3D12_GPU_DESCRIPTOR_HANDLE> m_sampler_map;
};

} // namespace DX12
@@ -0,0 +1,188 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "Common/Assert.h"
#include "VideoBackends/D3D12/DXContext.h"
#include "VideoCommon/VideoConfig.h"

namespace DX12
{
DescriptorHeapManager::DescriptorHeapManager() = default;
DescriptorHeapManager::~DescriptorHeapManager() = default;

bool DescriptorHeapManager::Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type,
u32 num_descriptors)
{
D3D12_DESCRIPTOR_HEAP_DESC desc = {type, static_cast<UINT>(num_descriptors),
D3D12_DESCRIPTOR_HEAP_FLAG_NONE};

HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap));
CHECK(SUCCEEDED(hr), "Create descriptor heap");
if (FAILED(hr))
return false;

m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart();
m_num_descriptors = num_descriptors;
m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(type);

// Set all slots to unallocated (1)
const u32 bitset_count =
num_descriptors / BITSET_SIZE + (((num_descriptors % BITSET_SIZE) != 0) ? 1 : 0);
m_free_slots.resize(bitset_count);
for (BitSetType& bs : m_free_slots)
bs.flip();

return true;
}

bool DescriptorHeapManager::Allocate(DescriptorHandle* handle)
{
// Start past the temporary slots, no point in searching those.
for (u32 group = 0; group < m_free_slots.size(); group++)
{
BitSetType& bs = m_free_slots[group];
if (bs.none())
continue;

u32 bit = 0;
for (; bit < BITSET_SIZE; bit++)
{
if (bs[bit])
break;
}

u32 index = group * BITSET_SIZE + bit;
bs[bit] = false;

handle->index = index;
handle->cpu_handle.ptr = m_heap_base_cpu.ptr + index * m_descriptor_increment_size;
handle->gpu_handle.ptr = m_heap_base_gpu.ptr + index * m_descriptor_increment_size;
return true;
}

PanicAlert("Out of fixed descriptors");
return false;
}

void DescriptorHeapManager::Free(u32 index)
{
ASSERT(index < m_num_descriptors);

u32 group = index / BITSET_SIZE;
u32 bit = index % BITSET_SIZE;
m_free_slots[group][bit] = true;
}

void DescriptorHeapManager::Free(const DescriptorHandle& handle)
{
Free(handle.index);
}

SamplerHeapManager::SamplerHeapManager() = default;
SamplerHeapManager::~SamplerHeapManager() = default;

static void GetD3DSamplerDesc(D3D12_SAMPLER_DESC* desc, const SamplerState& state)
{
if (state.mipmap_filter == SamplerState::Filter::Linear)
{
if (state.min_filter == SamplerState::Filter::Linear)
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
D3D12_FILTER_MIN_MAG_MIP_LINEAR :
D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
}
else
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR :
D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
}
}
else
{
if (state.min_filter == SamplerState::Filter::Linear)
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT :
D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
}
else
{
desc->Filter = (state.mag_filter == SamplerState::Filter::Linear) ?
D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT :
D3D12_FILTER_MIN_MAG_MIP_POINT;
}
}

static constexpr std::array<D3D12_TEXTURE_ADDRESS_MODE, 3> address_modes = {
{D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP,
D3D12_TEXTURE_ADDRESS_MODE_MIRROR}};
desc->AddressU = address_modes[static_cast<u32>(state.wrap_u.Value())];
desc->AddressV = address_modes[static_cast<u32>(state.wrap_v.Value())];
desc->AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
desc->MaxLOD = state.max_lod / 16.f;
desc->MinLOD = state.min_lod / 16.f;
desc->MipLODBias = static_cast<s32>(state.lod_bias) / 256.f;
desc->ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;

if (state.anisotropic_filtering)
{
desc->Filter = D3D12_FILTER_ANISOTROPIC;
desc->MaxAnisotropy = 1u << g_ActiveConfig.iMaxAnisotropy;
}
}

bool SamplerHeapManager::Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle)
{
const auto it = m_sampler_map.find(ss.hex);
if (it != m_sampler_map.end())
{
*handle = it->second;
return true;
}

if (m_current_offset == m_num_descriptors)
{
// We can clear at any time because the descriptors are copied prior to execution.
// It's still not free, since we have to recreate all our samplers again.
WARN_LOG(VIDEO, "Out of samplers, resetting CPU heap");
Clear();
}

D3D12_SAMPLER_DESC desc = {};
GetD3DSamplerDesc(&desc, ss);

const D3D12_CPU_DESCRIPTOR_HANDLE new_handle = {m_heap_base_cpu.ptr +
m_current_offset * m_descriptor_increment_size};
g_dx_context->GetDevice()->CreateSampler(&desc, new_handle);

m_sampler_map.emplace(ss.hex, new_handle);
m_current_offset++;
*handle = new_handle;
return true;
}

void SamplerHeapManager::Clear()
{
m_sampler_map.clear();
m_current_offset = 0;
}

bool SamplerHeapManager::Create(ID3D12Device* device, u32 num_descriptors)
{
const D3D12_DESCRIPTOR_HEAP_DESC desc = {D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, num_descriptors};
HRESULT hr = device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&m_descriptor_heap));
CHECK(SUCCEEDED(hr), "Failed to create sampler descriptor heap");
if (FAILED(hr))
return false;

m_num_descriptors = num_descriptors;
m_descriptor_increment_size =
device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
return true;
}
} // namespace DX12
@@ -0,0 +1,74 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <bitset>
#include <unordered_map>
#include "VideoBackends/D3D12/Common.h"
#include "VideoCommon/RenderState.h"

namespace DX12
{
// This class provides an abstraction for D3D12 descriptor heaps.
struct DescriptorHandle final
{
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle;
u32 index;

operator bool() const { return cpu_handle.ptr != 0; }
};

class DescriptorHeapManager final
{
public:
DescriptorHeapManager();
~DescriptorHeapManager();

ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); }
u32 GetDescriptorIncrementSize() const { return m_descriptor_increment_size; }

bool Create(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, u32 num_descriptors);

bool Allocate(DescriptorHandle* handle);
void Free(const DescriptorHandle& handle);
void Free(u32 index);

private:
ComPtr<ID3D12DescriptorHeap> m_descriptor_heap;
u32 m_num_descriptors = 0;
u32 m_descriptor_increment_size = 0;

D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu = {};

static constexpr u32 BITSET_SIZE = 1024;
using BitSetType = std::bitset<BITSET_SIZE>;
std::vector<BitSetType> m_free_slots = {};
};

class SamplerHeapManager final
{
public:
SamplerHeapManager();
~SamplerHeapManager();

ID3D12DescriptorHeap* GetDescriptorHeap() const { return m_descriptor_heap.Get(); }

bool Create(ID3D12Device* device, u32 num_descriptors);
bool Lookup(const SamplerState& ss, D3D12_CPU_DESCRIPTOR_HANDLE* handle);
void Clear();

private:
ComPtr<ID3D12DescriptorHeap> m_descriptor_heap;
u32 m_num_descriptors = 0;
u32 m_descriptor_increment_size = 0;
u32 m_current_offset = 0;

D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu;

std::unordered_map<SamplerState::StorageType, D3D12_CPU_DESCRIPTOR_HANDLE> m_sampler_map;
};
} // namespace DX12
@@ -0,0 +1,235 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include <algorithm>

#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3D12/DXContext.h"
#include "VideoBackends/D3D12/PerfQuery.h"
#include "VideoBackends/D3D12/Renderer.h"

namespace DX12
{
PerfQuery::PerfQuery() = default;

PerfQuery::~PerfQuery() = default;

bool PerfQuery::Initialize()
{
constexpr D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_OCCLUSION, PERF_QUERY_BUFFER_SIZE};
HRESULT hr = g_dx_context->GetDevice()->CreateQueryHeap(&desc, IID_PPV_ARGS(&m_query_heap));
CHECK(SUCCEEDED(hr), "Failed to create query heap");
if (FAILED(hr))
return false;

constexpr D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_READBACK};
constexpr D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
0,
PERF_QUERY_BUFFER_SIZE * sizeof(PerfQueryDataType),
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
hr = g_dx_context->GetDevice()->CreateCommittedResource(
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST,
nullptr, IID_PPV_ARGS(&m_query_readback_buffer));
CHECK(SUCCEEDED(hr), "Failed to create query buffer");
if (FAILED(hr))
return false;

return true;
}

void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
if (m_query_count > m_query_buffer.size() / 2)
{
const bool do_resolve = m_unresolved_queries > m_query_buffer.size() / 2;
const bool blocking = m_query_count == PERF_QUERY_BUFFER_SIZE;
PartialFlush(do_resolve, blocking);
}

if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
ASSERT(!entry.has_value && !entry.resolved);
entry.has_value = true;

g_dx_context->GetCommandList()->BeginQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
m_query_next_pos);
}
}

void PerfQuery::DisableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
m_query_next_pos);
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
m_query_count++;
m_unresolved_queries++;
}
}

void PerfQuery::ResetQuery()
{
m_query_count = 0;
m_unresolved_queries = 0;
m_query_resolve_pos = 0;
m_query_readback_pos = 0;
m_query_next_pos = 0;
std::fill_n(m_results, ArraySize(m_results), 0);
for (auto& entry : m_query_buffer)
{
entry.fence_value = 0;
entry.resolved = false;
entry.has_value = false;
}
}

u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];

return result / 4;
}

void PerfQuery::FlushResults()
{
while (!IsFlushed())
PartialFlush(true, true);
}

bool PerfQuery::IsFlushed() const
{
return m_query_count == 0;
}

void PerfQuery::ResolveQueries()
{
// Do we need to split the resolve as it's wrapping around?
if ((m_query_resolve_pos + m_unresolved_queries) > PERF_QUERY_BUFFER_SIZE)
ResolveQueries(PERF_QUERY_BUFFER_SIZE - m_query_resolve_pos);

ResolveQueries(m_unresolved_queries);
}

void PerfQuery::ResolveQueries(u32 query_count)
{
DEBUG_ASSERT(m_unresolved_queries >= query_count &&
(m_query_resolve_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

g_dx_context->GetCommandList()->ResolveQueryData(
m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION, m_query_resolve_pos, query_count,
m_query_readback_buffer.Get(), m_query_resolve_pos * sizeof(PerfQueryDataType));

// Flag all queries as available, but with a fence that has to be completed first
for (u32 i = 0; i < query_count; i++)
{
ActiveQuery& entry = m_query_buffer[m_query_resolve_pos + i];
DEBUG_ASSERT(entry.has_value && !entry.resolved);
entry.fence_value = g_dx_context->GetCurrentFenceValue();
entry.resolved = true;
}
m_query_resolve_pos = (m_query_resolve_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
m_unresolved_queries -= query_count;
}

void PerfQuery::ReadbackQueries()
{
const u64 completed_fence_counter = g_dx_context->GetCompletedFenceValue();

// Need to save these since ProcessResults will modify them.
const u32 outstanding_queries = m_query_count;
u32 readback_count = 0;
for (u32 i = 0; i < outstanding_queries; i++)
{
u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE;
const ActiveQuery& entry = m_query_buffer[index];
if (!entry.resolved || entry.fence_value > completed_fence_counter)
break;

// If this wrapped around, we need to flush the entries before the end of the buffer.
if (index < m_query_readback_pos)
{
ReadbackQueries(readback_count);
DEBUG_ASSERT(m_query_readback_pos == 0);
readback_count = 0;
}

readback_count++;
}

if (readback_count > 0)
ReadbackQueries(readback_count);
}

void PerfQuery::ReadbackQueries(u32 query_count)
{
// Should be at maximum query_count queries pending.
ASSERT(query_count <= m_query_count &&
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);

const D3D12_RANGE read_range = {m_query_readback_pos * sizeof(PerfQueryDataType),
(m_query_readback_pos + query_count) * sizeof(PerfQueryDataType)};
u8* mapped_ptr;
HRESULT hr = m_query_readback_buffer->Map(0, &read_range, reinterpret_cast<void**>(&mapped_ptr));
CHECK(SUCCEEDED(hr), "Failed to map query readback buffer");
if (FAILED(hr))
return;

// Remove pending queries.
for (u32 i = 0; i < query_count; i++)
{
u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE;
ActiveQuery& entry = m_query_buffer[index];

// Should have a fence associated with it (waiting for a result).
ASSERT(entry.fence_value != 0);
entry.fence_value = 0;
entry.resolved = false;
entry.has_value = false;

// Grab result from readback buffer, it will already have been invalidated.
PerfQueryDataType result;
std::memcpy(&result, mapped_ptr + (index * sizeof(PerfQueryDataType)), sizeof(result));

// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] +=
static_cast<u32>(static_cast<u64>(result) * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
}

constexpr D3D12_RANGE write_range = {0, 0};
m_query_readback_buffer->Unmap(0, &write_range);

m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
m_query_count -= query_count;
}

void PerfQuery::PartialFlush(bool resolve, bool blocking)
{
// Submit a command buffer in the background if the front query is not bound to one.
if ((resolve || blocking) && !m_query_buffer[m_query_resolve_pos].resolved)
Renderer::GetInstance()->ExecuteCommandList(blocking);

ReadbackQueries();
}
} // namespace DX12
@@ -0,0 +1,60 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <array>
#include "VideoBackends/D3D12/DXContext.h"
#include "VideoCommon/PerfQueryBase.h"

namespace DX12
{
class PerfQuery final : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();

static PerfQuery* GetInstance() { return static_cast<PerfQuery*>(g_perf_query.get()); }

bool Initialize();
void ResolveQueries();

void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;

private:
struct ActiveQuery
{
u64 fence_value;
PerfQueryType query_type;
bool has_value;
bool resolved;
};

void ResolveQueries(u32 query_count);
void ReadbackQueries();
void ReadbackQueries(u32 query_count);

void PartialFlush(bool resolve, bool blocking);

// when testing in SMS: 64 was too small, 128 was ok
// TODO: This should be size_t, but the base class uses u32s
using PerfQueryDataType = u64;
static const u32 PERF_QUERY_BUFFER_SIZE = 512;
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer = {};
u32 m_unresolved_queries = 0;
u32 m_query_resolve_pos = 0;
u32 m_query_readback_pos = 0;
u32 m_query_next_pos = 0;

ComPtr<ID3D12QueryHeap> m_query_heap;
ComPtr<ID3D12Resource> m_query_readback_buffer;
};

} // namespace DX12

Large diffs are not rendered by default.

@@ -0,0 +1,172 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once
#include <d3d12.h>
#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "VideoCommon/RenderBase.h"

namespace DX12
{
class BoundingBox;
class DXFramebuffer;
class DXTexture;
class DXShader;
class DXPipeline;
class SwapChain;

class Renderer final : public ::Renderer
{
public:
Renderer(std::unique_ptr<SwapChain> swap_chain, float backbuffer_scale);
~Renderer() override;

static Renderer* GetInstance() { return static_cast<Renderer*>(g_renderer.get()); }

bool IsHeadless() const override;

bool Initialize() override;
void Shutdown() override;

std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override;
std::unique_ptr<AbstractStagingTexture>
CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;

std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage, const char* source,
size_t length) override;
std::unique_ptr<AbstractShader> CreateShaderFromBinary(ShaderStage stage, const void* data,
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;

u16 BBoxRead(int index) override;
void BBoxWrite(int index, u16 value) override;
void BBoxFlush() override;

void Flush() override;
void WaitForGPUIdle() override;

void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable,
u32 color, u32 z) override;

void SetPipeline(const AbstractPipeline* pipeline) override;
void SetFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override;
void UnbindTexture(const AbstractTexture* texture) override;
void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

// Completes the current render pass, executes the command buffer, and restores state ready for
// next render. Use when you want to kick the current buffer to make room for new data.
void ExecuteCommandList(bool wait_for_completion);

// Setting constant buffer handles.
void SetConstantBuffer(u32 index, D3D12_GPU_VIRTUAL_ADDRESS address);

// Setting textures via descriptor handles. This is assumed to be in the shadow heap.
void SetTextureDescriptor(u32 index, D3D12_CPU_DESCRIPTOR_HANDLE handle);

// Pixel shader UAV.
void SetPixelShaderUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);

// Graphics vertex/index buffer binding.
void SetVertexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 stride, u32 size);
void SetIndexBuffer(D3D12_GPU_VIRTUAL_ADDRESS address, u32 size, DXGI_FORMAT format);

protected:
void OnConfigChanged(u32 bits) override;

private:
static const u32 MAX_TEXTURES = 8;
static const u32 NUM_CONSTANT_BUFFERS = 3;

// Dirty bits
enum DirtyStates
{
DirtyState_Framebuffer = (1 << 0),
DirtyState_Pipeline = (1 << 1),
DirtyState_Textures = (1 << 2),
DirtyState_Samplers = (1 << 3),
DirtyState_Viewport = (1 << 4),
DirtyState_ScissorRect = (1 << 5),
DirtyState_ComputeImageTexture = (1 << 6),
DirtyState_PS_UAV = (1 << 7),
DirtyState_PS_CBV = (1 << 8),
DirtyState_VS_CBV = (1 << 9),
DirtyState_GS_CBV = (1 << 10),
DirtyState_SRV_Descriptor = (1 << 11),
DirtyState_Sampler_Descriptor = (1 << 12),
DirtyState_UAV_Descriptor = (1 << 13),
DirtyState_VertexBuffer = (1 << 14),
DirtyState_IndexBuffer = (1 << 15),
DirtyState_PrimitiveTopology = (1 << 16),
DirtyState_RootSignature = (1 << 17),
DirtyState_ComputeRootSignature = (1 << 18),
DirtyState_DescriptorHeaps = (1 << 19),

DirtyState_All =
DirtyState_Framebuffer | DirtyState_Pipeline | DirtyState_Textures | DirtyState_Samplers |
DirtyState_Viewport | DirtyState_ScissorRect | DirtyState_ComputeImageTexture |
DirtyState_PS_UAV | DirtyState_PS_CBV | DirtyState_VS_CBV | DirtyState_GS_CBV |
DirtyState_SRV_Descriptor | DirtyState_Sampler_Descriptor | DirtyState_UAV_Descriptor |
DirtyState_VertexBuffer | DirtyState_IndexBuffer | DirtyState_PrimitiveTopology |
DirtyState_RootSignature | DirtyState_ComputeRootSignature | DirtyState_DescriptorHeaps
};

void CheckForSwapChainChanges();

// Binds all dirty state
bool ApplyState();
void BindFramebuffer(DXFramebuffer* fb);
void SetRootSignatures();
void SetDescriptorHeaps();
void UpdateDescriptorTables();
bool UpdateSRVDescriptorTable();
bool UpdateUAVDescriptorTable();
bool UpdateComputeUAVDescriptorTable();
bool UpdateSamplerDescriptorTable();

// Owned objects
std::unique_ptr<SwapChain> m_swap_chain;
std::unique_ptr<BoundingBox> m_bounding_box;

// Current state
struct
{
ID3D12RootSignature* root_signature = nullptr;
DXShader* compute_shader = nullptr;
std::array<D3D12_GPU_VIRTUAL_ADDRESS, 3> constant_buffers = {};
std::array<D3D12_CPU_DESCRIPTOR_HANDLE, MAX_TEXTURES> textures = {};
D3D12_CPU_DESCRIPTOR_HANDLE ps_uav = {};
SamplerStateSet samplers = {};
const DXTexture* compute_image_texture = nullptr;
D3D12_VIEWPORT viewport = {};
D3D12_RECT scissor = {};
D3D12_GPU_DESCRIPTOR_HANDLE srv_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE sampler_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE uav_descriptor_base = {};
D3D12_GPU_DESCRIPTOR_HANDLE compute_uav_descriptor_base = {};
D3D12_VERTEX_BUFFER_VIEW vertex_buffer = {};
D3D12_INDEX_BUFFER_VIEW index_buffer = {};
D3D12_PRIMITIVE_TOPOLOGY primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
bool using_integer_rtv = false;
} m_state;
u32 m_dirty_bits = DirtyState_All;
};
} // namespace DX12
@@ -0,0 +1,249 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/StreamBuffer.h"

#include <algorithm>
#include <functional>

#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/MsgHandler.h"

#include "VideoBackends/D3D12/DXContext.h"

namespace DX12
{
StreamBuffer::StreamBuffer() = default;

StreamBuffer::~StreamBuffer()
{
if (m_host_pointer)
{
const D3D12_RANGE written_range = {0, m_size};
m_buffer->Unmap(0, &written_range);
}

// These get destroyed at shutdown anyway, so no need to defer destruction.
if (m_buffer)
m_buffer->Release();
}

bool StreamBuffer::AllocateBuffer(u32 size)
{
static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_UPLOAD};
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
0,
size,
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};

HRESULT hr = g_dx_context->GetDevice()->CreateCommittedResource(
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr, IID_PPV_ARGS(&m_buffer));
CHECK(SUCCEEDED(hr), "Allocate buffer");
if (FAILED(hr))
return false;

static const D3D12_RANGE read_range = {};
hr = m_buffer->Map(0, &read_range, reinterpret_cast<void**>(&m_host_pointer));
CHECK(SUCCEEDED(hr), "Map buffer");
if (FAILED(hr))
return false;

m_size = size;
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
m_current_offset = 0;
m_current_gpu_position = 0;
m_tracked_fences.clear();
return true;
}

bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
{
const u32 required_bytes = num_bytes + alignment;

// Check for sane allocations
if (required_bytes > m_size)
{
PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer",
static_cast<uint32_t>(num_bytes), static_cast<uint32_t>(m_size));

return false;
}

// Is the GPU behind or up to date with our current offset?
UpdateCurrentFencePosition();
if (m_current_offset >= m_current_gpu_position)
{
const u32 remaining_bytes = m_size - m_current_offset;
if (required_bytes <= remaining_bytes)
{
// Place at the current position, after the GPU position.
m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_last_allocation_size = num_bytes;
return true;
}

// Check for space at the start of the buffer
// We use < here because we don't want to have the case of m_current_offset ==
// m_current_gpu_position. That would mean the code above would assume the
// GPU has caught up to us, which it hasn't.
if (required_bytes < m_current_gpu_position)
{
// Reset offset to zero, since we're allocating behind the gpu now
m_current_offset = 0;
m_last_allocation_size = num_bytes;
return true;
}
}
else
{
// We have from m_current_offset..m_current_gpu_position space to use.
const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
if (required_bytes < remaining_bytes)
{
// Place at the current position, since this is still behind the GPU.
m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_last_allocation_size = num_bytes;
return true;
}
}

// Can we find a fence to wait on that will give us enough memory?
if (WaitForClearSpace(required_bytes))
{
m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_last_allocation_size = num_bytes;
return true;
}

// We tried everything we could, and still couldn't get anything. This means that too much space
// in the buffer is being used by the command buffer currently being recorded. Therefore, the
// only option is to execute it, and wait until it's done.
return false;
}

void StreamBuffer::CommitMemory(u32 final_num_bytes)
{
ASSERT((m_current_offset + final_num_bytes) <= m_size);
ASSERT(final_num_bytes <= m_last_allocation_size);
m_current_offset += final_num_bytes;
}

void StreamBuffer::UpdateCurrentFencePosition()
{
// Don't create a tracking entry if the GPU is caught up with the buffer.
if (m_current_offset == m_current_gpu_position)
return;

// Has the offset changed since the last fence?
const u64 fence = g_dx_context->GetCurrentFenceValue();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
{
// Still haven't executed a command buffer, so just update the offset.
m_tracked_fences.back().second = m_current_offset;
return;
}

UpdateGPUPosition();
m_tracked_fences.emplace_back(fence, m_current_offset);
}

void StreamBuffer::UpdateGPUPosition()
{
auto start = m_tracked_fences.begin();
auto end = start;

const u64 completed_counter = g_dx_context->GetCompletedFenceValue();
while (end != m_tracked_fences.end() && completed_counter >= end->first)
{
m_current_gpu_position = end->second;
++end;
}

if (start != end)
m_tracked_fences.erase(start, end);
}

bool StreamBuffer::WaitForClearSpace(u32 num_bytes)
{
u32 new_offset = 0;
u32 new_gpu_position = 0;

auto iter = m_tracked_fences.begin();
for (; iter != m_tracked_fences.end(); iter++)
{
// Would this fence bring us in line with the GPU?
// This is the "last resort" case, where a command buffer execution has been forced
// after no additional data has been written to it, so we can assume that after the
// fence has been signaled the entire buffer is now consumed.
u32 gpu_position = iter->second;
if (m_current_offset == gpu_position)
{
new_offset = 0;
new_gpu_position = 0;
break;
}

// Assuming that we wait for this fence, are we allocating in front of the GPU?
if (m_current_offset > gpu_position)
{
// This would suggest the GPU has now followed us and wrapped around, so we have from
// m_current_position..m_size free, as well as and 0..gpu_position.
const u32 remaining_space_after_offset = m_size - m_current_offset;
if (remaining_space_after_offset >= num_bytes)
{
// Switch to allocating in front of the GPU, using the remainder of the buffer.
new_offset = m_current_offset;
new_gpu_position = gpu_position;
break;
}

// We can wrap around to the start, behind the GPU, if there is enough space.
// We use > here because otherwise we'd end up lining up with the GPU, and then the
// allocator would assume that the GPU has consumed what we just wrote.
if (gpu_position > num_bytes)
{
new_offset = 0;
new_gpu_position = gpu_position;
break;
}
}
else
{
// We're currently allocating behind the GPU. This would give us between the current
// offset and the GPU position worth of space to work with. Again, > because we can't
// align the GPU position with the buffer offset.
u32 available_space_inbetween = gpu_position - m_current_offset;
if (available_space_inbetween > num_bytes)
{
// Leave the offset as-is, but update the GPU position.
new_offset = m_current_offset;
new_gpu_position = gpu_position;
break;
}
}
}

// Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() || iter->first == g_dx_context->GetCurrentFenceValue())
return false;

// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
g_dx_context->WaitForFence(iter->first);
m_tracked_fences.erase(m_tracked_fences.begin(),
m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset;
m_current_gpu_position = new_gpu_position;
return true;
}

} // namespace DX12
@@ -0,0 +1,56 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <deque>
#include <utility>

#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/Common.h"

namespace DX12
{
class StreamBuffer
{
public:
StreamBuffer();
~StreamBuffer();

bool AllocateBuffer(u32 size);

ID3D12Resource* GetBuffer() const { return m_buffer; }
D3D12_GPU_VIRTUAL_ADDRESS GetGPUPointer() const { return m_gpu_pointer; }
u8* GetHostPointer() const { return m_host_pointer; }
u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; }
D3D12_GPU_VIRTUAL_ADDRESS GetCurrentGPUPointer() const
{
return m_gpu_pointer + m_current_offset;
}
u32 GetSize() const { return m_size; }
u32 GetCurrentOffset() const { return m_current_offset; }
bool ReserveMemory(u32 num_bytes, u32 alignment);
void CommitMemory(u32 final_num_bytes);

private:
void UpdateCurrentFencePosition();
void UpdateGPUPosition();

// Waits for as many fences as needed to allocate num_bytes bytes from the buffer.
bool WaitForClearSpace(u32 num_bytes);

u32 m_size = 0;
u32 m_current_offset = 0;
u32 m_current_gpu_position = 0;
u32 m_last_allocation_size = 0;

ID3D12Resource* m_buffer = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {};
u8* m_host_pointer = nullptr;

// List of fences and the corresponding positions in the buffer
std::deque<std::pair<u64, u32>> m_tracked_fences;
};

} // namespace DX12
@@ -0,0 +1,76 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoBackends/D3D12/SwapChain.h"
#include "VideoBackends/D3D12/DXContext.h"
#include "VideoBackends/D3D12/DXTexture.h"

namespace DX12
{
SwapChain::SwapChain(const WindowSystemInfo& wsi, IDXGIFactory2* dxgi_factory,
ID3D12CommandQueue* d3d_command_queue)
: D3DCommon::SwapChain(wsi, dxgi_factory, d3d_command_queue)
{
}

SwapChain::~SwapChain() = default;

std::unique_ptr<SwapChain> SwapChain::Create(const WindowSystemInfo& wsi)
{
std::unique_ptr<SwapChain> swap_chain = std::make_unique<SwapChain>(
wsi, g_dx_context->GetDXGIFactory(), g_dx_context->GetCommandQueue());
if (!swap_chain->CreateSwapChain(WantsStereo()))
return nullptr;

return swap_chain;
}

bool SwapChain::CreateSwapChainBuffers()
{
for (u32 i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++)
{
ComPtr<ID3D12Resource> resource;
HRESULT hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(&resource));
CHECK(SUCCEEDED(hr), "Get swap chain buffer");

BufferResources buffer;
buffer.texture = DXTexture::CreateAdopted(resource.Get());
CHECK(buffer.texture, "Create swap chain buffer texture");
if (!buffer.texture)
return false;

buffer.framebuffer = DXFramebuffer::Create(buffer.texture.get(), nullptr);
CHECK(buffer.texture, "Create swap chain buffer framebuffer");
if (!buffer.framebuffer)
return false;

m_buffers.push_back(std::move(buffer));
}

m_current_buffer = 0;
return true;
}

void SwapChain::DestroySwapChainBuffers()
{
// Swap chain textures must be released before it can be resized, therefore we need to destroy all
// of them immediately, and not place them onto the deferred desturction queue.
for (BufferResources& res : m_buffers)
{
res.framebuffer.reset();
res.texture->DestroyResource();
res.texture.release();
}
m_buffers.clear();
}

bool SwapChain::Present()
{
if (!D3DCommon::SwapChain::Present())
return false;

m_current_buffer = (m_current_buffer + 1) % SWAP_CHAIN_BUFFER_COUNT;
return true;
}
} // namespace DX12
@@ -0,0 +1,55 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <d3d12.h>
#include <dxgi.h>
#include <memory>
#include <vector>

#include "Common/CommonTypes.h"
#include "Common/WindowSystemInfo.h"
#include "VideoBackends/D3D12/Common.h"
#include "VideoBackends/D3DCommon/SwapChain.h"
#include "VideoCommon/TextureConfig.h"

namespace DX12
{
class DXTexture;
class DXFramebuffer;

class SwapChain : public D3DCommon::SwapChain
{
public:
SwapChain(const WindowSystemInfo& wsi, IDXGIFactory2* dxgi_factory,
ID3D12CommandQueue* d3d_command_queue);
~SwapChain();

static std::unique_ptr<SwapChain> Create(const WindowSystemInfo& wsi);

bool Present() override;

DXTexture* GetCurrentTexture() const { return m_buffers[m_current_buffer].texture.get(); }
DXFramebuffer* GetCurrentFramebuffer() const
{
return m_buffers[m_current_buffer].framebuffer.get();
}

protected:
bool CreateSwapChainBuffers() override;
void DestroySwapChainBuffers() override;

private:
struct BufferResources
{
std::unique_ptr<DXTexture> texture;
std::unique_ptr<DXFramebuffer> framebuffer;
};

std::vector<BufferResources> m_buffers;
u32 m_current_buffer = 0;
};

} // namespace DX12
@@ -0,0 +1,321 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "Common/CommonTypes.h"

#include "VideoBackends/D3D12/VertexManager.h"

#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"

#include "VideoBackends/D3D12/DXContext.h"
#include "VideoBackends/D3D12/Renderer.h"
#include "VideoBackends/D3D12/StreamBuffer.h"

#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"

namespace DX12
{
VertexManager::VertexManager() = default;

VertexManager::~VertexManager() = default;

bool VertexManager::Initialize()
{
if (!m_vertex_stream_buffer.AllocateBuffer(VERTEX_STREAM_BUFFER_SIZE) ||
!m_index_stream_buffer.AllocateBuffer(INDEX_STREAM_BUFFER_SIZE) ||
!m_uniform_stream_buffer.AllocateBuffer(UNIFORM_STREAM_BUFFER_SIZE) ||
!m_texel_stream_buffer.AllocateBuffer(TEXEL_STREAM_BUFFER_SIZE))
{
PanicAlert("Failed to allocate streaming buffers");
return false;
}

static constexpr std::array<std::pair<TexelBufferFormat, DXGI_FORMAT>, NUM_TEXEL_BUFFER_FORMATS>
format_mapping = {{
{TEXEL_BUFFER_FORMAT_R8_UINT, DXGI_FORMAT_R8_UINT},
{TEXEL_BUFFER_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT},
{TEXEL_BUFFER_FORMAT_RGBA8_UINT, DXGI_FORMAT_R8G8B8A8_UINT},
{TEXEL_BUFFER_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_UINT},
}};
for (const auto& it : format_mapping)
{
DescriptorHandle& dh = m_texel_buffer_views[it.first];
if (!g_dx_context->GetDescriptorHeapManager().Allocate(&dh))
{
PanicAlert("Failed to allocate descriptor for texel buffer");
return false;
}

D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {it.second, D3D12_SRV_DIMENSION_BUFFER,
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
srv_desc.Buffer.NumElements =
m_texel_stream_buffer.GetSize() / GetTexelBufferElementSize(it.first);
g_dx_context->GetDevice()->CreateShaderResourceView(m_texel_stream_buffer.GetBuffer(),
&srv_desc, dh.cpu_handle);
}

UploadAllConstants();
return true;
}

void VertexManager::ResetBuffer(u32 vertex_stride)
{
// Attempt to allocate from buffers
bool has_vbuffer_allocation = m_vertex_stream_buffer.ReserveMemory(MAXVBUFFERSIZE, vertex_stride);
bool has_ibuffer_allocation =
m_index_stream_buffer.ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16));
if (!has_vbuffer_allocation || !has_ibuffer_allocation)
{
// Flush any pending commands first, so that we can wait on the fences
WARN_LOG(VIDEO, "Executing command list while waiting for space in vertex/index buffer");
Renderer::GetInstance()->ExecuteCommandList(false);

// Attempt to allocate again, this may cause a fence wait
if (!has_vbuffer_allocation)
has_vbuffer_allocation = m_vertex_stream_buffer.ReserveMemory(MAXVBUFFERSIZE, vertex_stride);
if (!has_ibuffer_allocation)
has_ibuffer_allocation =
m_index_stream_buffer.ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16));

// If we still failed, that means the allocation was too large and will never succeed, so panic
if (!has_vbuffer_allocation || !has_ibuffer_allocation)
PanicAlert("Failed to allocate space in streaming buffers for pending draw");
}

// Update pointers
m_base_buffer_pointer = m_vertex_stream_buffer.GetHostPointer();
m_end_buffer_pointer = m_vertex_stream_buffer.GetCurrentHostPointer() + MAXVBUFFERSIZE;
m_cur_buffer_pointer = m_vertex_stream_buffer.GetCurrentHostPointer();
IndexGenerator::Start(reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer()));
}

void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
const u32 vertex_data_size = num_vertices * vertex_stride;
const u32 index_data_size = num_indices * sizeof(u16);

*out_base_vertex =
vertex_stride > 0 ? (m_vertex_stream_buffer.GetCurrentOffset() / vertex_stride) : 0;
*out_base_index = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);

m_vertex_stream_buffer.CommitMemory(vertex_data_size);
m_index_stream_buffer.CommitMemory(index_data_size);

ADDSTAT(stats.thisFrame.bytesVertexStreamed, static_cast<int>(vertex_data_size));
ADDSTAT(stats.thisFrame.bytesIndexStreamed, static_cast<int>(index_data_size));

Renderer::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), vertex_stride,
m_vertex_stream_buffer.GetSize());
Renderer::GetInstance()->SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(),
m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
}

void VertexManager::UploadUniforms()
{
UpdateVertexShaderConstants();
UpdateGeometryShaderConstants();
UpdatePixelShaderConstants();
}

void VertexManager::UpdateVertexShaderConstants()
{
if (!VertexShaderManager::dirty || !ReserveConstantStorage())
return;

Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer());
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &VertexShaderManager::constants,
sizeof(VertexShaderConstants));
m_uniform_stream_buffer.CommitMemory(sizeof(VertexShaderConstants));
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants));
VertexShaderManager::dirty = false;
}

void VertexManager::UpdateGeometryShaderConstants()
{
if (!GeometryShaderManager::dirty || !ReserveConstantStorage())
return;

Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer());
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &GeometryShaderManager::constants,
sizeof(GeometryShaderConstants));
m_uniform_stream_buffer.CommitMemory(sizeof(GeometryShaderConstants));
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants));
GeometryShaderManager::dirty = false;
}

void VertexManager::UpdatePixelShaderConstants()
{
if (!PixelShaderManager::dirty || !ReserveConstantStorage())
return;

Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer());
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), &PixelShaderManager::constants,
sizeof(PixelShaderConstants));
m_uniform_stream_buffer.CommitMemory(sizeof(PixelShaderConstants));
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
PixelShaderManager::dirty = false;
}

bool VertexManager::ReserveConstantStorage()
{
static constexpr u32 reserve_size =
static_cast<u32>(std::max({sizeof(PixelShaderConstants), sizeof(VertexShaderConstants),
sizeof(GeometryShaderConstants)}));
if (m_uniform_stream_buffer.ReserveMemory(reserve_size,
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
{
return true;
}

// The only places that call constant updates are safe to have state restored.
WARN_LOG(VIDEO, "Executing command list while waiting for space in uniform buffer");
Renderer::GetInstance()->ExecuteCommandList(false);

// Since we are on a new command buffer, all constants have been invalidated, and we need
// to reupload them. We may as well do this now, since we're issuing a draw anyway.
UploadAllConstants();
return false;
}

void VertexManager::UploadAllConstants()
{
// We are free to re-use parts of the buffer now since we're uploading all constants.
const u32 pixel_constants_offset = 0;
const u32 vertex_constants_offset =
Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
const u32 geometry_constants_offset =
Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
const u32 allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants);

// Allocate everything at once.
// We should only be here if the buffer was full and a command buffer was submitted anyway.
if (!m_uniform_stream_buffer.ReserveMemory(allocation_size,
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
{
PanicAlert("Failed to allocate space for constants in streaming buffer");
return;
}

// Update bindings
Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer() +
pixel_constants_offset);
Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer() +
vertex_constants_offset);
Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer() +
geometry_constants_offset);

// Copy the actual data in
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + pixel_constants_offset,
&PixelShaderManager::constants, sizeof(PixelShaderConstants));
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + vertex_constants_offset,
&VertexShaderManager::constants, sizeof(VertexShaderConstants));
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer() + geometry_constants_offset,
&GeometryShaderManager::constants, sizeof(GeometryShaderConstants));

// Finally, flush buffer memory after copying
m_uniform_stream_buffer.CommitMemory(allocation_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, allocation_size);

// Clear dirty flags
VertexShaderManager::dirty = false;
GeometryShaderManager::dirty = false;
PixelShaderManager::dirty = false;
}

void VertexManager::UploadUtilityUniforms(const void* data, u32 data_size)
{
InvalidateConstants();
if (!m_uniform_stream_buffer.ReserveMemory(data_size,
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
{
WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer");
Renderer::GetInstance()->ExecuteCommandList(false);
}

Renderer::GetInstance()->SetConstantBuffer(0, m_uniform_stream_buffer.GetCurrentGPUPointer());
Renderer::GetInstance()->SetConstantBuffer(1, m_uniform_stream_buffer.GetCurrentGPUPointer());
Renderer::GetInstance()->SetConstantBuffer(2, m_uniform_stream_buffer.GetCurrentGPUPointer());
std::memcpy(m_uniform_stream_buffer.GetCurrentHostPointer(), data, data_size);
m_uniform_stream_buffer.CommitMemory(data_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size);
}

bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset)
{
if (data_size > m_texel_stream_buffer.GetSize())
return false;

const u32 elem_size = GetTexelBufferElementSize(format);
if (!m_texel_stream_buffer.ReserveMemory(data_size, elem_size))
{
// Try submitting cmdbuffer.
WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer");
Renderer::GetInstance()->ExecuteCommandList(false);
if (!m_texel_stream_buffer.ReserveMemory(data_size, elem_size))
{
PanicAlert("Failed to allocate %u bytes from texel buffer", data_size);
return false;
}
}

std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer(), data, data_size);
*out_offset = static_cast<u32>(m_texel_stream_buffer.GetCurrentOffset()) / elem_size;
m_texel_stream_buffer.CommitMemory(data_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size);
Renderer::GetInstance()->SetTextureDescriptor(0, m_texel_buffer_views[format].cpu_handle);
return true;
}

bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset, const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset)
{
const u32 elem_size = GetTexelBufferElementSize(format);
const u32 palette_elem_size = GetTexelBufferElementSize(palette_format);
const u32 reserve_size = data_size + palette_size + palette_elem_size;
if (reserve_size > m_texel_stream_buffer.GetSize())
return false;

if (!m_texel_stream_buffer.ReserveMemory(reserve_size, elem_size))
{
// Try submitting cmdbuffer.
WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer");
Renderer::GetInstance()->ExecuteCommandList(false);
if (!m_texel_stream_buffer.ReserveMemory(reserve_size, elem_size))
{
PanicAlert("Failed to allocate %u bytes from texel buffer", reserve_size);
return false;
}
}

const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size);
std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer(), data, data_size);
std::memcpy(m_texel_stream_buffer.GetCurrentHostPointer() + palette_byte_offset, palette_data,
palette_size);
*out_offset = static_cast<u32>(m_texel_stream_buffer.GetCurrentOffset()) / elem_size;
*out_palette_offset =
(static_cast<u32>(m_texel_stream_buffer.GetCurrentOffset()) + palette_byte_offset) /
palette_elem_size;

m_texel_stream_buffer.CommitMemory(palette_byte_offset + palette_size);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size);
Renderer::GetInstance()->SetTextureDescriptor(0, m_texel_buffer_views[format].cpu_handle);
Renderer::GetInstance()->SetTextureDescriptor(1, m_texel_buffer_views[palette_format].cpu_handle);
return true;
}

} // namespace DX12
@@ -0,0 +1,52 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#pragma once

#include <memory>
#include "VideoBackends/D3D12/DescriptorHeapManager.h"
#include "VideoBackends/D3D12/StreamBuffer.h"
#include "VideoCommon/VertexManagerBase.h"

namespace DX12
{
class VertexManager final : public VertexManagerBase
{
public:
VertexManager();
~VertexManager();

bool Initialize() override;

void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset) override;

protected:
void ResetBuffer(u32 vertex_stride) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadUniforms() override;

void UpdateVertexShaderConstants();
void UpdateGeometryShaderConstants();
void UpdatePixelShaderConstants();

// Allocates storage in the uniform buffer of the specified size. If this storage cannot be
// allocated immediately, the current command buffer will be submitted and all stage's
// constants will be re-uploaded. false will be returned in this case, otherwise true.
bool ReserveConstantStorage();
void UploadAllConstants();

StreamBuffer m_vertex_stream_buffer;
StreamBuffer m_index_stream_buffer;
StreamBuffer m_uniform_stream_buffer;
StreamBuffer m_texel_stream_buffer;
std::array<DescriptorHandle, NUM_TEXEL_BUFFER_FORMATS> m_texel_buffer_views = {};
};

} // namespace DX12