@@ -6,7 +6,6 @@

#include "Common/Align.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/StringUtil.h"

#include "Core/ConfigManager.h"
@@ -25,16 +24,9 @@

namespace DX11
{
GeometryShaderCache::GSCache GeometryShaderCache::GeometryShaders;
const GeometryShaderCache::GSCacheEntry* GeometryShaderCache::last_entry;
GeometryShaderUid GeometryShaderCache::last_uid;
const GeometryShaderCache::GSCacheEntry GeometryShaderCache::pass_entry;

ID3D11GeometryShader* ClearGeometryShader = nullptr;
ID3D11GeometryShader* CopyGeometryShader = nullptr;

LinearDiskCache<GeometryShaderUid, u8> g_gs_disk_cache;

ID3D11GeometryShader* GeometryShaderCache::GetClearGeometryShader()
{
return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? ClearGeometryShader : nullptr;
@@ -63,16 +55,6 @@ ID3D11Buffer*& GeometryShaderCache::GetConstantBuffer()
return gscbuf;
}

// this class will load the precompiled shaders into our cache
class GeometryShaderCacheInserter : public LinearDiskCacheReader<GeometryShaderUid, u8>
{
public:
void Read(const GeometryShaderUid& key, const u8* value, u32 value_size)
{
GeometryShaderCache::InsertByteCode(key, value, value_size);
}
};

const char clear_shader_code[] = {
"struct VSOUTPUT\n"
"{\n"
@@ -155,44 +137,6 @@ void GeometryShaderCache::Init()
CopyGeometryShader = D3D::CompileAndCreateGeometryShader(copy_shader_code);
CHECK(CopyGeometryShader != nullptr, "Create copy geometry shader");
D3D::SetDebugObjectName(CopyGeometryShader, "copy geometry shader");

Clear();

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileShaders();
}

void GeometryShaderCache::LoadShaderCache()
{
GeometryShaderCacheInserter inserter;
g_gs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "GS", true, true), inserter);
}

void GeometryShaderCache::Reload()
{
g_gs_disk_cache.Sync();
g_gs_disk_cache.Close();
Clear();

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileShaders();
}

// ONLY to be used during shutdown.
void GeometryShaderCache::Clear()
{
for (auto& iter : GeometryShaders)
iter.second.Destroy();
GeometryShaders.clear();

last_entry = nullptr;
last_uid = {};
}

void GeometryShaderCache::Shutdown()
@@ -201,83 +145,5 @@ void GeometryShaderCache::Shutdown()

SAFE_RELEASE(ClearGeometryShader);
SAFE_RELEASE(CopyGeometryShader);

Clear();
g_gs_disk_cache.Sync();
g_gs_disk_cache.Close();
}

bool GeometryShaderCache::SetShader(PrimitiveType primitive_type)
{
GeometryShaderUid uid = GetGeometryShaderUid(primitive_type);
if (last_entry && uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
D3D::stateman->SetGeometryShader(last_entry->shader);
return true;
}

// Check if the shader is a pass-through shader
if (uid.GetUidData()->IsPassthrough())
{
// Return the default pass-through shader
last_uid = uid;
last_entry = &pass_entry;
D3D::stateman->SetGeometryShader(last_entry->shader);
return true;
}

// Check if the shader is already in the cache
auto iter = GeometryShaders.find(uid);
if (iter != GeometryShaders.end())
{
const GSCacheEntry& entry = iter->second;
last_uid = uid;
last_entry = &entry;
D3D::stateman->SetGeometryShader(last_entry->shader);
return (entry.shader != nullptr);
}

// Need to compile a new shader
if (CompileShader(uid))
return SetShader(primitive_type);
else
return false;
}

bool GeometryShaderCache::CompileShader(const GeometryShaderUid& uid)
{
D3DBlob* bytecode;
ShaderCode code =
GenerateGeometryShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
if (!D3D::CompileGeometryShader(code.GetBuffer(), &bytecode) ||
!InsertByteCode(uid, bytecode ? bytecode->Data() : nullptr, bytecode ? bytecode->Size() : 0))
{
SAFE_RELEASE(bytecode);
return false;
}

// Insert the bytecode into the caches
g_gs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
return true;
}

bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode,
size_t len)
{
GSCacheEntry& newentry = GeometryShaders[uid];
newentry.shader = bytecode ? D3D::CreateGeometryShaderFromByteCode(bytecode, len) : nullptr;
return newentry.shader != nullptr;
}

void GeometryShaderCache::PrecompileShaders()
{
EnumerateGeometryShaderUids([](const GeometryShaderUid& uid) {
if (GeometryShaders.find(uid) != GeometryShaders.end())
return;

CompileShader(uid);
});
}

} // DX11
@@ -15,36 +15,12 @@ class GeometryShaderCache
{
public:
static void Init();
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader(PrimitiveType primitive_type);
static bool CompileShader(const GeometryShaderUid& uid);
static bool InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode, size_t len);
static void PrecompileShaders();

static ID3D11GeometryShader* GetClearGeometryShader();
static ID3D11GeometryShader* GetCopyGeometryShader();

static ID3D11Buffer*& GetConstantBuffer();

private:
struct GSCacheEntry
{
ID3D11GeometryShader* shader;

GSCacheEntry() : shader(nullptr) {}
void Destroy() { SAFE_RELEASE(shader); }
};

typedef std::map<GeometryShaderUid, GSCacheEntry> GSCache;

static void LoadShaderCache();

static GSCache GeometryShaders;
static const GSCacheEntry* last_entry;
static GeometryShaderUid last_uid;
static const GSCacheEntry pass_entry;
};

} // namespace DX11
@@ -13,6 +13,8 @@

namespace DX11
{
std::mutex s_input_layout_lock;

std::unique_ptr<NativeVertexFormat>
VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
@@ -116,23 +118,34 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl)

D3DVertexFormat::~D3DVertexFormat()
{
SAFE_RELEASE(m_layout);
ID3D11InputLayout* layout = m_layout.load();
SAFE_RELEASE(layout);
}

ID3D11InputLayout* D3DVertexFormat::GetInputLayout(D3DBlob* vs_bytecode)
{
if (m_layout)
return m_layout;
// CreateInputLayout requires a shader input, but it only looks at the signature of the shader,
// so we don't need to recompute it if the shader changes.
ID3D11InputLayout* layout = m_layout.load();
if (layout)
return layout;

// CreateInputLayout requires a shader input, but it only looks at the
// signature of the shader, so we don't need to recompute it if the shader
// changes.
HRESULT hr = DX11::D3D::device->CreateInputLayout(
m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &m_layout);
m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &layout);
if (FAILED(hr))
PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__);
DX11::D3D::SetDebugObjectName(m_layout, "input layout used to emulate the GX pipeline");
return m_layout;

// This method can be called from multiple threads, so ensure that only one thread sets the
// cached input layout pointer. If another thread beats this thread, use the existing layout.
ID3D11InputLayout* expected = nullptr;
if (!m_layout.compare_exchange_strong(expected, layout))
{
SAFE_RELEASE(layout);
layout = expected;
}

return layout;
}

} // namespace DX11
@@ -7,7 +7,6 @@
#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"

@@ -27,17 +26,6 @@

namespace DX11
{
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
PixelShaderCache::UberPSCache PixelShaderCache::UberPixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_uber_entry;
PixelShaderUid PixelShaderCache::last_uid;
UberShader::PixelShaderUid PixelShaderCache::last_uber_uid;

LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
LinearDiskCache<UberShader::PixelShaderUid, u8> g_uber_ps_disk_cache;
extern std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;

ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
ID3D11PixelShader* s_ClearProgram = nullptr;
ID3D11PixelShader* s_AnaglyphProgram = nullptr;
@@ -309,17 +297,6 @@ ID3D11Buffer* PixelShaderCache::GetConstantBuffer()
return pscbuf;
}

// this class will load the precompiled shaders into our cache
template <typename UidType>
class PixelShaderCacheInserter : public LinearDiskCacheReader<UidType, u8>
{
public:
void Read(const UidType& key, const u8* value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size);
}
};

void PixelShaderCache::Init()
{
unsigned int cbsize = Common::AlignUp(static_cast<unsigned int>(sizeof(PixelShaderConstants)),
@@ -344,58 +321,6 @@ void PixelShaderCache::Init()
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code);
CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader");
D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader");

Clear();

SETSTAT(stats.numPixelShadersCreated, 0);
SETSTAT(stats.numPixelShadersAlive, 0);

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}

void PixelShaderCache::LoadShaderCache()
{
PixelShaderCacheInserter<PixelShaderUid> inserter;
g_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "PS", true, true), inserter);

PixelShaderCacheInserter<UberShader::PixelShaderUid> uber_inserter;
g_uber_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberPS", false, true),
uber_inserter);
}

void PixelShaderCache::Reload()
{
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
g_uber_ps_disk_cache.Sync();
g_uber_ps_disk_cache.Close();
Clear();

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}

// ONLY to be used during shutdown.
void PixelShaderCache::Clear()
{
for (auto& iter : PixelShaders)
iter.second.Destroy();
for (auto& iter : UberPixelShaders)
iter.second.Destroy();
PixelShaders.clear();
UberPixelShaders.clear();

last_entry = nullptr;
last_uber_entry = nullptr;
last_uid = {};
last_uber_uid = {};
}

// Used in Swap() when AA mode has changed
@@ -420,255 +345,5 @@ void PixelShaderCache::Shutdown()
SAFE_RELEASE(s_rgba6_to_rgb8[i]);
SAFE_RELEASE(s_rgb8_to_rgba6[i]);
}

Clear();
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
g_uber_ps_disk_cache.Sync();
g_uber_ps_disk_cache.Close();
}

bool PixelShaderCache::SetShader()
{
if (g_ActiveConfig.bDisableSpecializedShaders)
return SetUberShader();

PixelShaderUid uid = GetPixelShaderUid();
ClearUnusedPixelShaderUidBits(APIType::D3D, &uid);
if (last_entry && uid == last_uid)
{
if (last_entry->pending)
return SetUberShader();

if (!last_entry->shader)
return false;

D3D::stateman->SetPixelShader(last_entry->shader);
return true;
}

// Check if the shader is already in the cache
auto iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
const PSCacheEntry& entry = iter->second;
if (entry.pending)
return SetUberShader();

last_uid = uid;
last_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
if (!last_entry->shader)
return false;

D3D::stateman->SetPixelShader(last_entry->shader);
return true;
}

// Background compiling?
if (g_ActiveConfig.CanBackgroundCompileShaders())
{
// Create a pending entry
PSCacheEntry entry;
entry.pending = true;
PixelShaders[uid] = entry;

// Queue normal shader compiling and use ubershader
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<PixelShaderCompilerWorkItem>(uid));
return SetUberShader();
}

// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode ? bytecode->Data() : nullptr, bytecode ? bytecode->Size() : 0))
{
SAFE_RELEASE(bytecode);
return false;
}

g_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
return SetShader();
}

bool PixelShaderCache::SetUberShader()
{
UberShader::PixelShaderUid uid = UberShader::GetPixelShaderUid();
UberShader::ClearUnusedPixelShaderUidBits(APIType::D3D, &uid);

if (last_uber_entry && last_uber_uid == uid)
{
if (!last_uber_entry->shader)
return false;

D3D::stateman->SetPixelShader(last_uber_entry->shader);
return true;
}

auto iter = UberPixelShaders.find(uid);
if (iter != UberPixelShaders.end())
{
const PSCacheEntry& entry = iter->second;
last_uber_uid = uid;
last_uber_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
if (!last_uber_entry->shader)
return false;

D3D::stateman->SetPixelShader(last_uber_entry->shader);
return true;
}

D3DBlob* bytecode = nullptr;
ShaderCode code =
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode ? bytecode->Data() : nullptr, bytecode ? bytecode->Size() : 0))
{
SAFE_RELEASE(bytecode);
return false;
}

// Lookup map again.
g_uber_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetUberShader();
}

bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len)
{
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
if (!InsertShader(uid, shader))
{
SAFE_RELEASE(shader);
return false;
}

return true;
}

bool PixelShaderCache::InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data,
size_t len)
{
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
if (!InsertShader(uid, shader))
{
SAFE_RELEASE(shader);
return false;
}

return true;
}

bool PixelShaderCache::InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader)
{
auto iter = PixelShaders.find(uid);
if (iter != PixelShaders.end() && !iter->second.pending)
return false;

PSCacheEntry& newentry = PixelShaders[uid];
newentry.pending = false;
newentry.shader = shader;

INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
return (shader != nullptr);
}

bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid,
ID3D11PixelShader* shader)
{
auto iter = UberPixelShaders.find(uid);
if (iter != UberPixelShaders.end() && !iter->second.pending)
return false;

PSCacheEntry& newentry = UberPixelShaders[uid];
newentry.pending = false;
newentry.shader = shader;
return (shader != nullptr);
}

void PixelShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) {
if (UberPixelShaders.find(uid) != UberPixelShaders.end())
return;

g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<UberPixelShaderCompilerWorkItem>(uid));
});

g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
g_async_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);
}

PixelShaderCache::PixelShaderCompilerWorkItem::PixelShaderCompilerWorkItem(
const PixelShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}

PixelShaderCache::PixelShaderCompilerWorkItem::~PixelShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
}

bool PixelShaderCache::PixelShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());

if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);

return true;
}

void PixelShaderCache::PixelShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_shader))
g_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
else
SAFE_RELEASE(m_shader);
}

PixelShaderCache::UberPixelShaderCompilerWorkItem::UberPixelShaderCompilerWorkItem(
const UberShader::PixelShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}

PixelShaderCache::UberPixelShaderCompilerWorkItem::~UberPixelShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
}

bool PixelShaderCache::UberPixelShaderCompilerWorkItem::Compile()
{
ShaderCode code =
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());

if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);

return true;
}

void PixelShaderCache::UberPixelShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_shader))
g_uber_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
else
SAFE_RELEASE(m_shader);
}

} // DX11
@@ -19,16 +19,7 @@ class PixelShaderCache
{
public:
static void Init();
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader();
static bool SetUberShader();
static bool InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len);
static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len);
static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader);
static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader);
static void QueueUberShaderCompiles();

static ID3D11Buffer* GetConstantBuffer();

@@ -40,58 +31,6 @@ class PixelShaderCache
static ID3D11PixelShader* ReinterpRGB8ToRGBA6(bool multisampled);

static void InvalidateMSAAShaders();

private:
struct PSCacheEntry
{
ID3D11PixelShader* shader;
bool pending;

PSCacheEntry() : shader(nullptr), pending(false) {}
void Destroy() { SAFE_RELEASE(shader); }
};

class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit PixelShaderCompilerWorkItem(const PixelShaderUid& uid);
~PixelShaderCompilerWorkItem() override;

bool Compile() override;
void Retrieve() override;

private:
PixelShaderUid m_uid;
ID3D11PixelShader* m_shader = nullptr;
D3DBlob* m_bytecode = nullptr;
};

class UberPixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit UberPixelShaderCompilerWorkItem(const UberShader::PixelShaderUid& uid);
~UberPixelShaderCompilerWorkItem() override;

bool Compile() override;
void Retrieve() override;

private:
UberShader::PixelShaderUid m_uid;
ID3D11PixelShader* m_shader = nullptr;
D3DBlob* m_bytecode = nullptr;
};

typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
typedef std::map<UberShader::PixelShaderUid, PSCacheEntry> UberPSCache;

static void LoadShaderCache();

static PSCache PixelShaders;
static UberPSCache UberPixelShaders;
static const PSCacheEntry* last_entry;
static const PSCacheEntry* last_uber_entry;
static PixelShaderUid last_uid;
static UberShader::PixelShaderUid last_uber_uid;
};

} // namespace DX11
@@ -71,15 +71,6 @@ Renderer::Renderer(int backbuffer_width, int backbuffer_height)
g_framebuffer_manager = std::make_unique<FramebufferManager>(m_target_width, m_target_height);
SetupDeviceObjects();

// Setup GX pipeline state
for (auto& sampler : m_gx_state.samplers)
sampler.hex = RenderState::GetPointSamplerState().hex;

m_gx_state.zmode.testenable = false;
m_gx_state.zmode.updateenable = false;
m_gx_state.zmode.func = ZMode::NEVER;
m_gx_state.raster.cullmode = GenMode::CULL_NONE;

// Clear EFB textures
constexpr std::array<float, 4> clear_color{{0.f, 0.f, 0.f, 1.f}};
D3D::context->ClearRenderTargetView(FramebufferManager::GetEFBColorTexture()->GetRTV(),
@@ -299,6 +290,8 @@ void Renderer::UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride
void Renderer::SetPipeline(const AbstractPipeline* pipeline)
{
const DXPipeline* dx_pipeline = static_cast<const DXPipeline*>(pipeline);
if (!dx_pipeline)
return;

D3D::stateman->SetRasterizerState(dx_pipeline->GetRasterizerState());
D3D::stateman->SetDepthState(dx_pipeline->GetDepthState());
@@ -313,11 +306,6 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline)
void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices,
u32 vertex_stride, u32 num_vertices)
{
// Textures are fine, they're set directly via SetTexture.
// Since samplers are set via gx_state, we need to fix this up here.
for (size_t stage = 0; stage < m_gx_state.samplers.size(); stage++)
D3D::stateman->SetSampler(stage, m_state_cache.Get(m_gx_state.samplers[stage]));

// Copy in uniforms.
if (uniforms_size > 0)
{
@@ -638,11 +626,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
RestoreAPIState();
}

void Renderer::SetBlendingState(const BlendingState& state)
{
m_gx_state.blend.hex = state.hex;
}

// This function has the final picture. We adjust the aspect ratio here.
void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks,
float Gamma)
@@ -683,7 +666,6 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region
// Enable configuration changes
UpdateActiveConfig();
g_texture_cache->OnConfigChanged(g_ActiveConfig);
VertexShaderCache::RetreiveAsyncShaders();

// Flip/present backbuffer to frontbuffer here
if (D3D::swapchain)
@@ -706,12 +688,7 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region
D3D11_CLEAR_DEPTH, 0.f, 0);
}

if (CheckForHostConfigChanges())
{
VertexShaderCache::Reload();
GeometryShaderCache::Reload();
PixelShaderCache::Reload();
}
CheckForHostConfigChanges();

// begin next frame
RestoreAPIState();
@@ -784,30 +761,6 @@ void Renderer::RestoreAPIState()
BPFunctions::SetScissor();
}

void Renderer::ApplyState()
{
D3D::stateman->SetBlendState(m_state_cache.Get(m_gx_state.blend));
D3D::stateman->SetDepthState(m_state_cache.Get(m_gx_state.zmode));
D3D::stateman->SetRasterizerState(m_state_cache.Get(m_gx_state.raster));
D3D::stateman->SetPrimitiveTopology(
StateCache::GetPrimitiveTopology(m_gx_state.raster.primitive));
FramebufferManager::SetIntegerEFBRenderTarget(m_gx_state.blend.logicopenable);

for (u32 stage = 0; stage < static_cast<u32>(m_gx_state.samplers.size()); stage++)
D3D::stateman->SetSampler(stage, m_state_cache.Get(m_gx_state.samplers[stage]));

ID3D11Buffer* vertexConstants = VertexShaderCache::GetConstantBuffer();

D3D::stateman->SetPixelConstants(PixelShaderCache::GetConstantBuffer(),
g_ActiveConfig.bEnablePixelLighting ? vertexConstants : nullptr);
D3D::stateman->SetVertexConstants(vertexConstants);
D3D::stateman->SetGeometryConstants(GeometryShaderCache::GetConstantBuffer());
}

void Renderer::RestoreState()
{
}

void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer)
{
const DXFramebuffer* fb = static_cast<const DXFramebuffer*>(framebuffer);
@@ -838,16 +791,6 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
}
}

void Renderer::SetRasterizationState(const RasterizationState& state)
{
m_gx_state.raster.hex = state.hex;
}

void Renderer::SetDepthState(const DepthState& state)
{
m_gx_state.zmode.hex = state.hex;
}

void Renderer::SetTexture(u32 index, const AbstractTexture* texture)
{
D3D::stateman->SetTexture(
@@ -857,7 +800,7 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture)

void Renderer::SetSamplerState(u32 index, const SamplerState& state)
{
m_gx_state.samplers[index].hex = state.hex;
D3D::stateman->SetSampler(index, m_state_cache.Get(state));
}

void Renderer::UnbindTexture(const AbstractTexture* texture)
@@ -4,7 +4,6 @@

#pragma once

#include <array>
#include <d3d11.h>
#include <string>
#include "VideoBackends/D3D/D3DState.h"
@@ -41,10 +40,7 @@ class Renderer : public ::Renderer
void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetBlendingState(const BlendingState& state) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetRasterizationState(const RasterizationState& state) override;
void SetDepthState(const DepthState& state) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void UnbindTexture(const AbstractTexture* texture) override;
@@ -54,10 +50,6 @@ class Renderer : public ::Renderer
void SetFullscreen(bool enable_fullscreen) override;
bool IsFullscreen() const override;

// TODO: Fix confusing names (see ResetAPIState and RestoreAPIState)
void ApplyState() override;
void RestoreState() override;

void RenderText(const std::string& text, int left, int top, u32 color) override;

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
@@ -84,14 +76,6 @@ class Renderer : public ::Renderer
u32 groups_x, u32 groups_y, u32 groups_z) override;

private:
struct GXPipelineState
{
std::array<SamplerState, 8> samplers;
BlendingState blend;
DepthState zmode;
RasterizationState raster;
};

void SetupDeviceObjects();
void TeardownDeviceObjects();
void Create3DVisionTexture(int width, int height);
@@ -106,7 +90,6 @@ class Renderer : public ::Renderer
void UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices);

StateCache m_state_cache;
GXPipelineState m_gx_state;

std::array<ID3D11BlendState*, 4> m_clear_blend_states{};
std::array<ID3D11DepthStencilState*, 3> m_clear_depth_states{};
@@ -11,6 +11,7 @@
#include "VideoBackends/D3D/BoundingBox.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3D/FramebufferManager.h"
#include "VideoBackends/D3D/GeometryShaderCache.h"
#include "VideoBackends/D3D/PixelShaderCache.h"
#include "VideoBackends/D3D/Render.h"
@@ -135,42 +136,23 @@ void VertexManager::Draw(u32 stride)

void VertexManager::vFlush()
{
if (!PixelShaderCache::SetShader())
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR, true, { printf("Fail to set pixel shader\n"); });
return;
}

D3DVertexFormat* vertex_format =
static_cast<D3DVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
if (!VertexShaderCache::SetShader(vertex_format))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR, true, { printf("Fail to set pixel shader\n"); });
return;
}
u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride();
PrepareDrawBuffers(stride);

if (!GeometryShaderCache::SetShader(m_current_primitive_type))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR, true, { printf("Fail to set pixel shader\n"); });
if (!m_current_pipeline_object)
return;
}

if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active)
{
D3D::context->OMSetRenderTargetsAndUnorderedAccessViews(
D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 2, 1, &BBox::GetUAV(),
nullptr);
}
FramebufferManager::SetIntegerEFBRenderTarget(
m_current_pipeline_config.blending_state.logicopenable);
g_renderer->SetPipeline(m_current_pipeline_object);

u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride();

PrepareDrawBuffers(stride);

g_renderer->ApplyState();
ID3D11Buffer* vertexConstants = VertexShaderCache::GetConstantBuffer();
D3D::stateman->SetPixelConstants(PixelShaderCache::GetConstantBuffer(),
g_ActiveConfig.bEnablePixelLighting ? vertexConstants : nullptr);
D3D::stateman->SetVertexConstants(vertexConstants);
D3D::stateman->SetGeometryConstants(GeometryShaderCache::GetConstantBuffer());

Draw(stride);

g_renderer->RestoreState();
}

void VertexManager::ResetBuffer(u32 stride)
@@ -7,6 +7,7 @@
#include <d3d11.h>

#include <array>
#include <atomic>
#include <memory>
#include <vector>

@@ -29,7 +30,7 @@ class D3DVertexFormat : public NativeVertexFormat
std::array<D3D11_INPUT_ELEMENT_DESC, 32> m_elems{};
UINT m_num_elems = 0;

ID3D11InputLayout* m_layout = nullptr;
std::atomic<ID3D11InputLayout*> m_layout{nullptr};
};

class VertexManager : public VertexManagerBase
@@ -7,7 +7,6 @@
#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"

@@ -28,22 +27,11 @@

namespace DX11
{
VertexShaderCache::VSCache VertexShaderCache::vshaders;
VertexShaderCache::UberVSCache VertexShaderCache::ubervshaders;
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry;
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_uber_entry;
VertexShaderUid VertexShaderCache::last_uid;
UberShader::VertexShaderUid VertexShaderCache::last_uber_uid;

static ID3D11VertexShader* SimpleVertexShader = nullptr;
static ID3D11VertexShader* ClearVertexShader = nullptr;
static ID3D11InputLayout* SimpleLayout = nullptr;
static ID3D11InputLayout* ClearLayout = nullptr;

LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
LinearDiskCache<UberShader::VertexShaderUid, u8> g_uber_vs_disk_cache;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;

ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader()
{
return SimpleVertexShader;
@@ -164,345 +152,18 @@ void VertexShaderCache::Init()
D3D::SetDebugObjectName(ClearVertexShader, "clear vertex shader");
D3D::SetDebugObjectName(ClearLayout, "clear input layout");

Clear();

SETSTAT(stats.numVertexShadersCreated, 0);
SETSTAT(stats.numVertexShadersAlive, 0);

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

g_async_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
g_ActiveConfig.GetShaderPrecompilerThreads() :
g_ActiveConfig.GetShaderCompilerThreads());

if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}

void VertexShaderCache::LoadShaderCache()
{
VertexShaderCacheInserter<VertexShaderUid> inserter;
g_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "VS", true, true), inserter);

VertexShaderCacheInserter<UberShader::VertexShaderUid> uber_inserter;
g_uber_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberVS", false, true),
uber_inserter);
}

void VertexShaderCache::Reload()
{
g_async_compiler->WaitUntilCompletion();
g_async_compiler->RetrieveWorkItems();

g_vs_disk_cache.Sync();
g_vs_disk_cache.Close();
g_uber_vs_disk_cache.Sync();
g_uber_vs_disk_cache.Close();
Clear();

if (g_ActiveConfig.bShaderCache)
LoadShaderCache();

if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}

void VertexShaderCache::Clear()
{
for (auto& iter : vshaders)
iter.second.Destroy();
for (auto& iter : ubervshaders)
iter.second.Destroy();
vshaders.clear();
ubervshaders.clear();

last_uid = {};
last_uber_uid = {};
last_entry = nullptr;
last_uber_entry = nullptr;
last_uid = {};
last_uber_uid = {};
}

void VertexShaderCache::Shutdown()
{
g_async_compiler->StopWorkerThreads();
g_async_compiler->RetrieveWorkItems();

SAFE_RELEASE(vscbuf);

SAFE_RELEASE(SimpleVertexShader);
SAFE_RELEASE(ClearVertexShader);

SAFE_RELEASE(SimpleLayout);
SAFE_RELEASE(ClearLayout);

Clear();
g_vs_disk_cache.Sync();
g_vs_disk_cache.Close();
g_uber_vs_disk_cache.Sync();
g_uber_vs_disk_cache.Close();
}

bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format)
{
if (g_ActiveConfig.bDisableSpecializedShaders)
return SetUberShader(vertex_format);

VertexShaderUid uid = GetVertexShaderUid();
if (last_entry && uid == last_uid)
{
if (last_entry->pending)
return SetUberShader(vertex_format);

if (!last_entry->shader)
return false;

D3D::stateman->SetInputLayout(vertex_format->GetInputLayout(last_entry->bytecode));
D3D::stateman->SetVertexShader(last_entry->shader);
return true;
}

auto iter = vshaders.find(uid);
if (iter != vshaders.end())
{
const VSCacheEntry& entry = iter->second;
if (entry.pending)
return SetUberShader(vertex_format);

last_uid = uid;
last_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
if (!last_entry->shader)
return false;

D3D::stateman->SetInputLayout(vertex_format->GetInputLayout(last_entry->bytecode));
D3D::stateman->SetVertexShader(last_entry->shader);
return true;
}

// Background compiling?
if (g_ActiveConfig.CanBackgroundCompileShaders())
{
// Create a pending entry
VSCacheEntry entry;
entry.pending = true;
vshaders[uid] = entry;

// Queue normal shader compiling and use ubershader
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid));
return SetUberShader(vertex_format);
}

// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode))
{
SAFE_RELEASE(bytecode);
return false;
}

g_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetShader(vertex_format);
}

bool VertexShaderCache::SetUberShader(D3DVertexFormat* vertex_format)
{
D3DVertexFormat* uber_vertex_format = static_cast<D3DVertexFormat*>(
VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration()));
UberShader::VertexShaderUid uid = UberShader::GetVertexShaderUid();
if (last_uber_entry && last_uber_uid == uid)
{
if (!last_uber_entry->shader)
return false;

D3D::stateman->SetInputLayout(uber_vertex_format->GetInputLayout(last_uber_entry->bytecode));
D3D::stateman->SetVertexShader(last_uber_entry->shader);
return true;
}

auto iter = ubervshaders.find(uid);
if (iter != ubervshaders.end())
{
const VSCacheEntry& entry = iter->second;
last_uber_uid = uid;
last_uber_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
if (!last_uber_entry->shader)
return false;

D3D::stateman->SetInputLayout(uber_vertex_format->GetInputLayout(last_uber_entry->bytecode));
D3D::stateman->SetVertexShader(last_uber_entry->shader);
return true;
}

// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode))
{
SAFE_RELEASE(bytecode);
return false;
}

g_uber_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetUberShader(vertex_format);
}

bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob)
{
ID3D11VertexShader* shader =
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
bool result = InsertShader(uid, shader, blob);
SAFE_RELEASE(shader);
return result;
}

bool VertexShaderCache::InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob)
{
ID3D11VertexShader* shader =
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
bool result = InsertShader(uid, shader, blob);
SAFE_RELEASE(shader);
return result;
}

bool VertexShaderCache::InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader,
D3DBlob* blob)
{
auto iter = vshaders.find(uid);
if (iter != vshaders.end() && !iter->second.pending)
return false;

VSCacheEntry& newentry = vshaders[uid];
newentry.pending = false;
if (!shader || !blob)
return false;

shader->AddRef();
newentry.SetByteCode(blob);
newentry.shader = shader;

INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(vshaders.size()));
return true;
}

bool VertexShaderCache::InsertShader(const UberShader::VertexShaderUid& uid,
ID3D11VertexShader* shader, D3DBlob* blob)
{
auto iter = ubervshaders.find(uid);
if (iter != ubervshaders.end() && !iter->second.pending)
return false;

VSCacheEntry& newentry = ubervshaders[uid];
newentry.pending = false;
if (!shader || !blob)
return false;

shader->AddRef();
newentry.SetByteCode(blob);
newentry.shader = shader;
return true;
}

void VertexShaderCache::RetreiveAsyncShaders()
{
g_async_compiler->RetrieveWorkItems();
}

void VertexShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) {
if (ubervshaders.find(uid) != ubervshaders.end())
return;

g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<UberVertexShaderCompilerWorkItem>(uid));
});
}

void VertexShaderCache::WaitForBackgroundCompilesToComplete()
{
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
g_async_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);

// Switch from precompile -> runtime compiler threads.
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
}

VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem(
const VertexShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}

VertexShaderCache::VertexShaderCompilerWorkItem::~VertexShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
SAFE_RELEASE(m_vs);
}

bool VertexShaderCache::VertexShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());

if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);

return true;
}

void VertexShaderCache::VertexShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_vs, m_bytecode))
g_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
}

VertexShaderCache::UberVertexShaderCompilerWorkItem::UberVertexShaderCompilerWorkItem(
const UberShader::VertexShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}

VertexShaderCache::UberVertexShaderCompilerWorkItem::~UberVertexShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
SAFE_RELEASE(m_vs);
}

bool VertexShaderCache::UberVertexShaderCompilerWorkItem::Compile()
{
ShaderCode code =
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());

if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);

return true;
}

void VertexShaderCache::UberVertexShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_vs, m_bytecode))
g_uber_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
}

} // namespace DX11
@@ -21,91 +21,14 @@ class VertexShaderCache
{
public:
static void Init();
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader(D3DVertexFormat* vertex_format);
static bool SetUberShader(D3DVertexFormat* vertex_format);
static void RetreiveAsyncShaders();
static void QueueUberShaderCompiles();
static void WaitForBackgroundCompilesToComplete();

static ID3D11Buffer*& GetConstantBuffer();

static ID3D11VertexShader* GetSimpleVertexShader();
static ID3D11VertexShader* GetClearVertexShader();
static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout();

static bool InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob);
static bool InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob);
static bool InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader, D3DBlob* blob);
static bool InsertShader(const UberShader::VertexShaderUid& uid, ID3D11VertexShader* shader,
D3DBlob* blob);

private:
struct VSCacheEntry
{
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout
bool pending;

VSCacheEntry() : shader(nullptr), bytecode(nullptr), pending(false) {}
void SetByteCode(D3DBlob* blob)
{
SAFE_RELEASE(bytecode);
bytecode = blob;
blob->AddRef();
}
void Destroy()
{
SAFE_RELEASE(shader);
SAFE_RELEASE(bytecode);
}
};

class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit VertexShaderCompilerWorkItem(const VertexShaderUid& uid);
~VertexShaderCompilerWorkItem() override;

bool Compile() override;
void Retrieve() override;

private:
VertexShaderUid m_uid;
D3DBlob* m_bytecode = nullptr;
ID3D11VertexShader* m_vs = nullptr;
};

class UberVertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit UberVertexShaderCompilerWorkItem(const UberShader::VertexShaderUid& uid);
~UberVertexShaderCompilerWorkItem() override;

bool Compile() override;
void Retrieve() override;

private:
UberShader::VertexShaderUid m_uid;
D3DBlob* m_bytecode = nullptr;
ID3D11VertexShader* m_vs = nullptr;
};

typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
typedef std::map<UberShader::VertexShaderUid, VSCacheEntry> UberVSCache;

static void LoadShaderCache();
static void SetInputLayout();

static VSCache vshaders;
static UberVSCache ubervshaders;
static const VSCacheEntry* last_entry;
static const VSCacheEntry* last_uber_entry;
static VertexShaderUid last_uid;
static UberShader::VertexShaderUid last_uber_uid;
};

} // namespace DX11
@@ -21,6 +21,7 @@
#include "VideoBackends/D3D/VertexShaderCache.h"
#include "VideoBackends/D3D/VideoBackend.h"

#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"

@@ -69,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = true;

IDXGIFactory2* factory;
IDXGIAdapter* ad;
@@ -148,21 +150,25 @@ bool VideoBackend::Initialize(void* window_handle)

// internal interfaces
g_renderer = std::make_unique<Renderer>(backbuffer_width, backbuffer_height);
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
g_texture_cache = std::make_unique<TextureCache>();
g_vertex_manager = std::make_unique<VertexManager>();
g_perf_query = std::make_unique<PerfQuery>();

VertexShaderCache::Init();
PixelShaderCache::Init();
GeometryShaderCache::Init();
VertexShaderCache::WaitForBackgroundCompilesToComplete();
if (!g_shader_cache->Initialize())
return false;

D3D::InitUtils();
BBox::Init();
return true;
}

void VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();

D3D::ShutdownUtils();
@@ -174,6 +180,7 @@ void VideoBackend::Shutdown()
g_perf_query.reset();
g_vertex_manager.reset();
g_texture_cache.reset();
g_shader_cache.reset();
g_renderer.reset();

ShutdownShared();
@@ -3,7 +3,6 @@ set(SRCS
NullTexture.cpp
Render.cpp
VertexManager.cpp
ShaderCache.cpp
)

set(LIBS
@@ -39,14 +39,12 @@
<ClCompile Include="NullBackend.cpp" />
<ClCompile Include="NullTexture.cpp" />
<ClCompile Include="Render.cpp" />
<ClCompile Include="ShaderCache.cpp" />
<ClCompile Include="VertexManager.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="NullTexture.h" />
<ClInclude Include="PerfQuery.h" />
<ClInclude Include="Render.h" />
<ClInclude Include="ShaderCache.h" />
<ClInclude Include="TextureCache.h" />
<ClInclude Include="VertexManager.h" />
<ClInclude Include="VideoBackend.h" />
@@ -9,7 +9,6 @@

#include "VideoBackends/Null/PerfQuery.h"
#include "VideoBackends/Null/Render.h"
#include "VideoBackends/Null/ShaderCache.h"
#include "VideoBackends/Null/TextureCache.h"
#include "VideoBackends/Null/VertexManager.h"
#include "VideoBackends/Null/VideoBackend.h"
@@ -47,6 +46,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;

// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();
@@ -63,21 +63,15 @@ bool VideoBackend::Initialize(void* window_handle)
g_perf_query = std::make_unique<PerfQuery>();
g_framebuffer_manager = std::make_unique<FramebufferManagerBase>();
g_texture_cache = std::make_unique<TextureCache>();

VertexShaderCache::s_instance = std::make_unique<VertexShaderCache>();
GeometryShaderCache::s_instance = std::make_unique<GeometryShaderCache>();
PixelShaderCache::s_instance = std::make_unique<PixelShaderCache>();
return true;
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
return g_shader_cache->Initialize();
}

void VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();

PixelShaderCache::s_instance.reset();
VertexShaderCache::s_instance.reset();
GeometryShaderCache::s_instance.reset();

g_texture_cache.reset();
g_perf_query.reset();
g_vertex_manager.reset();

This file was deleted.

This file was deleted.

@@ -4,8 +4,6 @@

#include "VideoBackends/Null/VertexManager.h"

#include "VideoBackends/Null/ShaderCache.h"

#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VertexLoaderManager.h"
@@ -41,9 +39,6 @@ void VertexManager::ResetBuffer(u32 stride)

void VertexManager::vFlush()
{
VertexShaderCache::s_instance->SetShader(m_current_primitive_type);
GeometryShaderCache::s_instance->SetShader(m_current_primitive_type);
PixelShaderCache::s_instance->SetShader(m_current_primitive_type);
}

} // namespace
@@ -46,10 +46,11 @@ OGLPipeline::~OGLPipeline()

std::unique_ptr<OGLPipeline> OGLPipeline::Create(const AbstractPipelineConfig& config)
{
const PipelineProgram* program =
ProgramShaderCache::GetPipelineProgram(static_cast<const OGLShader*>(config.vertex_shader),
static_cast<const OGLShader*>(config.geometry_shader),
static_cast<const OGLShader*>(config.pixel_shader));
const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram(
static_cast<const GLVertexFormat*>(config.vertex_format),
static_cast<const OGLShader*>(config.vertex_shader),
static_cast<const OGLShader*>(config.geometry_shader),
static_cast<const OGLShader*>(config.pixel_shader));
if (!program)
return nullptr;

Large diffs are not rendered by default.

@@ -6,62 +6,19 @@

#include <atomic>
#include <memory>
#include <mutex>
#include <tuple>
#include <unordered_map>

#include "Common/GL/GLUtil.h"
#include "Common/LinearDiskCache.h"

#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"

class cInterfaceBase;

namespace OGL
{
class OGLShader;
class GLVertexFormat;
class StreamBuffer;

class SHADERUID
{
public:
VertexShaderUid vuid;
PixelShaderUid puid;
GeometryShaderUid guid;

bool operator<(const SHADERUID& r) const
{
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
}

bool operator==(const SHADERUID& r) const
{
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
}
};
class UBERSHADERUID
{
public:
UberShader::VertexShaderUid vuid;
UberShader::PixelShaderUid puid;
GeometryShaderUid guid;

bool operator<(const UBERSHADERUID& r) const
{
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
}

bool operator==(const UBERSHADERUID& r) const
{
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
}
};

struct SHADER
{
void Destroy()
@@ -111,18 +68,6 @@ struct PipelineProgram
class ProgramShaderCache
{
public:
struct PCacheEntry
{
SHADER shader;
bool in_cache;
bool pending;

void Destroy() { shader.Destroy(); }
};

static PCacheEntry GetShaderProgram();
static SHADER* SetShader(PrimitiveType primitive_type, const GLVertexFormat* vertex_format);
static SHADER* SetUberShader(PrimitiveType primitive_type, const GLVertexFormat* vertex_format);
static void BindVertexFormat(const GLVertexFormat* vertex_format);
static void InvalidateVertexFormat();
static void InvalidateLastProgram();
@@ -140,114 +85,25 @@ class ProgramShaderCache
static void UploadConstants();

static void Init();
static void Reload();
static void Shutdown();
static void CreateHeader();
static void RetrieveAsyncShaders();
static void PrecompileUberShaders();

static const PipelineProgram* GetPipelineProgram(const OGLShader* vertex_shader,
static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader);
static void ReleasePipelineProgram(const PipelineProgram* prog);

private:
template <typename UIDType>
class ProgramShaderCacheInserter : public LinearDiskCacheReader<UIDType, u8>
{
public:
ProgramShaderCacheInserter(std::map<UIDType, PCacheEntry>& shader_map)
: m_shader_map(shader_map)
{
}

void Read(const UIDType& key, const u8* value, u32 value_size) override
{
if (m_shader_map.find(key) != m_shader_map.end())
return;

PCacheEntry& entry = m_shader_map[key];
if (!CreateCacheEntryFromBinary(&entry, value, value_size))
{
m_shader_map.erase(key);
return;
}
}

private:
std::map<UIDType, PCacheEntry>& m_shader_map;
};

class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
{
protected:
bool WorkerThreadInitMainThread(void** param) override;
bool WorkerThreadInitWorkerThread(void* param) override;
void WorkerThreadExit(void* param) override;
};

struct SharedContextData
{
std::unique_ptr<cInterfaceBase> context;
GLuint prerender_FBO;
GLuint prerender_FBO_tex;
GLuint prerender_FBO_depth;
GLuint prerender_VBO;
GLuint prerender_VAO;
GLuint prerender_IBO;
};

class ShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit ShaderCompileWorkItem(const SHADERUID& uid);

bool Compile() override;
void Retrieve() override;

private:
SHADERUID m_uid;
SHADER m_program;
};

class UberShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit UberShaderCompileWorkItem(const UBERSHADERUID& uid);

bool Compile() override;
void Retrieve() override;

private:
UBERSHADERUID m_uid;
SHADER m_program;
};

typedef std::map<SHADERUID, PCacheEntry> PCache;
typedef std::map<UBERSHADERUID, PCacheEntry> UberPCache;
typedef std::unordered_map<PipelineProgramKey, std::unique_ptr<PipelineProgram>,
PipelineProgramKeyHash>
PipelineProgramMap;

static void CreateAttributelessVAO();
static GLuint CreateProgramFromBinary(const u8* value, u32 value_size);
static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size);
static void LoadProgramBinaries();
static void SaveProgramBinaries();
static void DestroyShaders();
static void CreatePrerenderArrays(SharedContextData* data);
static void DestroyPrerenderArrays(SharedContextData* data);
static void DrawPrerenderArray(const SHADER& shader, PrimitiveType primitive_type);

static PCache pshaders;
static UberPCache ubershaders;
static PipelineProgramMap pipelineprograms;
static PCacheEntry* last_entry;
static PCacheEntry* last_uber_entry;
static SHADERUID last_uid;
static UBERSHADERUID last_uber_uid;
static PipelineProgramMap s_pipeline_programs;
static std::mutex s_pipeline_program_lock;

static std::unique_ptr<SharedContextAsyncShaderCompiler> s_async_compiler;
static u32 s_ubo_buffer_size;
static s32 s_ubo_align;

@@ -256,4 +112,12 @@ class ProgramShaderCache
static GLuint s_last_VAO;
};

class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
{
protected:
bool WorkerThreadInitMainThread(void** param) override;
bool WorkerThreadInitWorkerThread(void* param) override;
void WorkerThreadExit(void* param) override;
};

} // namespace OGL
@@ -81,8 +81,8 @@ static bool s_efbCacheIsCleared = false;
static std::vector<u32>
s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor

static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const char* message, const void* userParam)
void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const char* message, const void* userParam)
{
const char* s_source;
const char* s_type;
@@ -677,6 +677,10 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;

// Background compiling is supported only when shared contexts aren't broken.
g_Config.backend_info.bSupportsBackgroundCompiling =
!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION);

if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))
@@ -784,25 +788,7 @@ Renderer::Renderer()
glClearDepthf(1.0f);

if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
{
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{
glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
}
else
{
if (GLExtensions::Version() >= 310)
{
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(65535);
}
else
{
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
glPrimitiveRestartIndexNV(65535);
}
}
}
GLUtil::EnablePrimitiveRestart();
IndexGenerator::Init();

UpdateActiveConfig();
@@ -1286,8 +1272,11 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
glClear(clear_mask);
}

void Renderer::ApplyBlendingState(const BlendingState& state)
void Renderer::ApplyBlendingState(const BlendingState state, bool force)
{
if (!force && m_current_blend_state == state)
return;

bool useDualSource =
state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend &&
(!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DUAL_SOURCE_BLENDING) || state.dstalpha);
@@ -1360,6 +1349,7 @@ void Renderer::ApplyBlendingState(const BlendingState& state)
}

glColorMask(state.colorupdate, state.colorupdate, state.colorupdate, state.alphaupdate);
m_current_blend_state = state;
}

// This function has the final picture. We adjust the aspect ratio here.
@@ -1465,7 +1455,6 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region

// Clean out old stuff from caches. It's not worth it to clean out the shader caches.
g_texture_cache->Cleanup(frameCount);
ProgramShaderCache::RetrieveAsyncShaders();

RestoreAPIState();

@@ -1479,8 +1468,7 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region
g_sampler_cache->Clear();

// Invalidate shader cache when the host config changes.
if (CheckForHostConfigChanges())
ProgramShaderCache::Reload();
CheckForHostConfigChanges();

// For testing zbuffer targets.
// Renderer::SetZBufferRender();
@@ -1559,15 +1547,19 @@ void Renderer::RestoreAPIState()
glEnable(GL_CLIP_DISTANCE0);
glEnable(GL_CLIP_DISTANCE1);
}
BPFunctions::SetGenerationMode();
BPFunctions::SetScissor();
BPFunctions::SetViewport();
BPFunctions::SetDepthMode();
BPFunctions::SetBlendMode();

ApplyRasterizationState(m_current_rasterization_state, true);
ApplyDepthState(m_current_depth_state, true);
ApplyBlendingState(m_current_blend_state, true);
}

void Renderer::ApplyRasterizationState(const RasterizationState& state)
void Renderer::ApplyRasterizationState(const RasterizationState state, bool force)
{
if (!force && m_current_rasterization_state == state)
return;

// none, ccw, cw, ccw
if (state.cullmode != GenMode::CULL_NONE)
{
@@ -1579,10 +1571,15 @@ void Renderer::ApplyRasterizationState(const RasterizationState& state)
{
glDisable(GL_CULL_FACE);
}

m_current_rasterization_state = state;
}

void Renderer::ApplyDepthState(const DepthState& state)
void Renderer::ApplyDepthState(const DepthState state, bool force)
{
if (!force && m_current_depth_state == state)
return;

const GLenum glCmpFuncs[8] = {GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL,
GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, GL_ALWAYS};

@@ -1600,28 +1597,18 @@ void Renderer::ApplyDepthState(const DepthState& state)
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
}
}

void Renderer::SetRasterizationState(const RasterizationState& state)
{
ApplyRasterizationState(state);
}

void Renderer::SetDepthState(const DepthState& state)
{
ApplyDepthState(state);
}

void Renderer::SetBlendingState(const BlendingState& state)
{
ApplyBlendingState(state);
m_current_depth_state = state;
}

void Renderer::SetPipeline(const AbstractPipeline* pipeline)
{
// Not all shader changes currently go through SetPipeline, so we can't
// test if the pipeline hasn't changed and skip these applications. Yet.
m_graphics_pipeline = static_cast<const OGLPipeline*>(pipeline);
if (!m_graphics_pipeline)
return;

ApplyRasterizationState(m_graphics_pipeline->GetRasterizationState());
ApplyDepthState(m_graphics_pipeline->GetDepthState());
ApplyBlendingState(m_graphics_pipeline->GetBlendingState());
@@ -1709,4 +1696,9 @@ void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* u
glDispatchCompute(groups_x, groups_y, groups_z);
ProgramShaderCache::InvalidateLastProgram();
}

std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<SharedContextAsyncShaderCompiler>();
}
}
@@ -105,10 +105,7 @@ class Renderer : public ::Renderer
void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetBlendingState(const BlendingState& state) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetRasterizationState(const RasterizationState& state) override;
void SetDepthState(const DepthState& state) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void UnbindTexture(const AbstractTexture* texture) override;
@@ -142,6 +139,8 @@ class Renderer : public ::Renderer
void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size,
u32 groups_x, u32 groups_y, u32 groups_z) override;

std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;

private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
const TargetRectangle& targetPixelRc, const void* data);
@@ -155,12 +154,15 @@ class Renderer : public ::Renderer
void CheckForSurfaceChange();
void CheckForSurfaceResize();

void ApplyBlendingState(const BlendingState& state);
void ApplyRasterizationState(const RasterizationState& state);
void ApplyDepthState(const DepthState& state);
void ApplyBlendingState(const BlendingState state, bool force = false);
void ApplyRasterizationState(const RasterizationState state, bool force = false);
void ApplyDepthState(const DepthState state, bool force = false);
void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size);

std::array<const AbstractTexture*, 8> m_bound_textures{};
const OGLPipeline* m_graphics_pipeline = nullptr;
RasterizationState m_current_rasterization_state = {};
DepthState m_current_depth_state = {};
BlendingState m_current_blend_state = {};
};
}
@@ -162,8 +162,6 @@ void VertexManager::vFlush()
GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
u32 stride = nativeVertexFmt->GetVertexStride();

ProgramShaderCache::SetShader(m_current_primitive_type, nativeVertexFmt);

PrepareDrawBuffers(stride);

// upload global constants
@@ -174,7 +172,11 @@ void VertexManager::vFlush()
glEnable(GL_STENCIL_TEST);
}

Draw(stride);
if (m_current_pipeline_object)
{
g_renderer->SetPipeline(m_current_pipeline_object);
Draw(stride);
}

if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
{
@@ -160,7 +160,7 @@ bool VideoBackend::Initialize(void* window_handle)
InitBackendInfo();
InitializeShared();

InitInterface();
GLUtil::InitInterface();
GLInterface->SetMode(GLInterfaceMode::MODE_DETECT);
if (!GLInterface->Create(window_handle, g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer))
return false;
@@ -175,17 +175,20 @@ bool VideoBackend::Initialize(void* window_handle)
ProgramShaderCache::Init();
g_texture_cache = std::make_unique<TextureCache>();
g_sampler_cache = std::make_unique<SamplerCache>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
static_cast<Renderer*>(g_renderer.get())->Init();
TextureConverter::Init();
BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight());
return true;
return g_shader_cache->Initialize();
}

void VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();
BoundingBox::Shutdown();
TextureConverter::Shutdown();
g_shader_cache.reset();
g_sampler_cache.reset();
g_texture_cache.reset();
ProgramShaderCache::Shutdown();
@@ -15,7 +15,7 @@ std::unique_ptr<SWOGLWindow> SWOGLWindow::s_instance;

void SWOGLWindow::Init(void* window_handle)
{
InitInterface();
GLUtil::InitInterface();
GLInterface->SetMode(GLInterfaceMode::MODE_DETECT);
if (!GLInterface->Create(window_handle))
{
@@ -71,7 +71,7 @@ void SWOGLWindow::Prepare()
"#version 300 es\n"
"precision highp float;\n";

m_image_program = OpenGL_CompileProgram(header + vertex_shader, header + frag_shader);
m_image_program = GLUtil::CompileProgram(header + vertex_shader, header + frag_shader);

glUseProgram(m_image_program);

@@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;

// aamodes
g_Config.backend_info.AAModes = {1};
@@ -96,11 +97,15 @@ bool VideoSoftware::Initialize(void* window_handle)
g_vertex_manager = std::make_unique<SWVertexLoader>();
g_perf_query = std::make_unique<PerfQuery>();
g_texture_cache = std::make_unique<TextureCache>();
return true;
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
return g_shader_cache->Initialize();
}

void VideoSoftware::Shutdown()
{
if (g_shader_cache)
g_shader_cache->Shutdown();

if (g_renderer)
g_renderer->Shutdown();

@@ -48,8 +48,7 @@ enum DESCRIPTOR_SET_BIND_POINT
// - Standard
// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS)
// - 8 combined image samplers (accessible from PS)
// - BBox Enabled
// - Same as standard, plus a single SSBO accessible from PS
// - 1 SSBO accessible from PS if supported
// - Push Constant
// - Same as standard, plus 128 bytes of push constants, accessible from all stages.
// - Texture Decoding
@@ -67,7 +66,6 @@ enum DESCRIPTOR_SET_BIND_POINT
enum PIPELINE_LAYOUT
{
PIPELINE_LAYOUT_STANDARD,
PIPELINE_LAYOUT_BBOX,
PIPELINE_LAYOUT_PUSH_CONSTANT,
PIPELINE_LAYOUT_TEXTURE_CONVERSION,
PIPELINE_LAYOUT_UTILITY,
@@ -109,6 +109,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
static const VkDescriptorSetLayoutBinding single_ubo_set_bindings[] = {
0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT};

// The geometry shader buffer must be last in this binding set, as we don't include it
// if geometry shaders are not supported by the device. See the decrement below.
static const VkDescriptorSetLayoutBinding per_stage_ubo_set_bindings[] = {
{UBO_DESCRIPTOR_SET_BINDING_PS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_FRAGMENT_BIT},
@@ -139,7 +142,7 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
};

static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(single_ubo_set_bindings)), single_ubo_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
@@ -153,6 +156,10 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_set_bindings)), compute_set_bindings}};

// Don't set the GS bit if geometry shaders aren't available.
if (!g_vulkan_context->SupportsGeometryShaders())
create_infos[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS].bindingCount--;

for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++)
{
VkResult res = vkCreateDescriptorSetLayout(g_vulkan_context->GetDevice(), &create_infos[i],
@@ -180,11 +187,10 @@ bool ObjectCache::CreatePipelineLayouts()
{
VkResult res;

// Descriptor sets for each pipeline layout
// Descriptor sets for each pipeline layout.
// In the standard set, the SSBO must be the last descriptor, as we do not include it
// when fragment stores and atomics are not supported by the device.
VkDescriptorSetLayout standard_sets[] = {
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS]};
VkDescriptorSetLayout bbox_sets[] = {
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS]};
@@ -207,10 +213,6 @@ bool ObjectCache::CreatePipelineLayouts()
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(standard_sets)), standard_sets, 0, nullptr},

// BBox
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(bbox_sets)), bbox_sets, 0, nullptr},

// Push Constant
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(standard_sets)), standard_sets, 1, &push_constant_range},
@@ -228,6 +230,10 @@ bool ObjectCache::CreatePipelineLayouts()
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}};

// If bounding box is unsupported, don't bother with the SSBO descriptor set.
if (!g_vulkan_context->SupportsBoundingBox())
pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--;

for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++)
{
if ((res = vkCreatePipelineLayout(g_vulkan_context->GetDevice(), &pipeline_layout_info[i],
@@ -224,7 +224,7 @@ std::tuple<VkBuffer, u32> Renderer::UpdateUtilityUniformBuffer(const void* unifo

void Renderer::SetPipeline(const AbstractPipeline* pipeline)
{
m_graphics_pipeline = static_cast<const VKPipeline*>(pipeline);
StateTracker::GetInstance()->SetPipeline(static_cast<const VKPipeline*>(pipeline));
}

void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices,
@@ -305,7 +305,7 @@ void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, cons
// Build commands.
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_graphics_pipeline->GetPipeline());
StateTracker::GetInstance()->GetPipeline()->GetVkPipeline());
if (vertex_buffer != VK_NULL_HANDLE)
vkCmdBindVertexBuffers(command_buffer, 0, 1, &vertex_buffer, &vertex_buffer_offset);

@@ -759,9 +759,6 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region

// Clean up stale textures.
TextureCache::GetInstance()->Cleanup(frameCount);

// Pull in now-ready async shaders.
g_shader_cache->RetrieveAsyncShaders();
}

void Renderer::DrawScreen(VKTexture* xfb_texture, const EFBRectangle& xfb_region)
@@ -975,10 +972,8 @@ void Renderer::CheckForConfigChanges()
RecreateEFBFramebuffer();
RecompileShaders();
FramebufferManager::GetInstance()->RecompileShaders();
g_shader_cache->ReloadShaderAndPipelineCaches();
g_shader_cache->ReloadPipelineCache();
g_shader_cache->RecompileSharedShaders();
StateTracker::GetInstance()->InvalidateShaderPointers();
StateTracker::GetInstance()->ReloadPipelineUIDCache();
}

// For vsync, we need to change the present mode, which means recreating the swap chain.
@@ -1021,8 +1016,6 @@ void Renderer::BindEFBToStateTracker()
FramebufferManager::GetInstance()->GetEFBClearRenderPass());
StateTracker::GetInstance()->SetFramebuffer(
FramebufferManager::GetInstance()->GetEFBFramebuffer(), framebuffer_size);
StateTracker::GetInstance()->SetMultisamplingstate(
FramebufferManager::GetInstance()->GetEFBMultisamplingState());
m_current_framebuffer = nullptr;
m_current_framebuffer_width = FramebufferManager::GetInstance()->GetEFBWidth();
m_current_framebuffer_height = FramebufferManager::GetInstance()->GetEFBHeight();
@@ -1125,21 +1118,6 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
num_clear_values);
}

void Renderer::SetRasterizationState(const RasterizationState& state)
{
StateTracker::GetInstance()->SetRasterizationState(state);
}

void Renderer::SetDepthState(const DepthState& state)
{
StateTracker::GetInstance()->SetDepthState(state);
}

void Renderer::SetBlendingState(const BlendingState& state)
{
StateTracker::GetInstance()->SetBlendState(state);
}

void Renderer::SetTexture(u32 index, const AbstractTexture* texture)
{
// Texture should always be in SHADER_READ_ONLY layout prior to use.
@@ -77,10 +77,7 @@ class Renderer : public ::Renderer
void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer,
const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetBlendingState(const BlendingState& state) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetRasterizationState(const RasterizationState& state) override;
void SetDepthState(const DepthState& state) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void UnbindTexture(const AbstractTexture* texture) override;
@@ -135,6 +132,5 @@ class Renderer : public ::Renderer

// Shaders used for clear/blit.
VkShaderModule m_clear_fragment_shader = VK_NULL_HANDLE;
const VKPipeline* m_graphics_pipeline = nullptr;
};
}

Large diffs are not rendered by default.

@@ -19,24 +19,14 @@
#include "VideoBackends/Vulkan/ObjectCache.h"
#include "VideoBackends/Vulkan/ShaderCompiler.h"

#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"

namespace Vulkan
{
class CommandBufferManager;
class VertexFormat;
class StreamBuffer;

class CommandBufferManager;
class VertexFormat;
class StreamBuffer;

struct PipelineInfo
{
// These are packed in descending order of size, to avoid any padding so that the structure
@@ -88,19 +78,6 @@ class ShaderCache
// Get utility shader header based on current config.
std::string GetUtilityShaderHeader() const;

// Accesses ShaderGen shader caches
VkShaderModule GetVertexShaderForUid(const VertexShaderUid& uid);
VkShaderModule GetGeometryShaderForUid(const GeometryShaderUid& uid);
VkShaderModule GetPixelShaderForUid(const PixelShaderUid& uid);

// Ubershader caches
VkShaderModule GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid);
VkShaderModule GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid);

// Accesses ShaderGen shader caches asynchronously
std::pair<VkShaderModule, bool> GetVertexShaderForUidAsync(const VertexShaderUid& uid);
std::pair<VkShaderModule, bool> GetPixelShaderForUidAsync(const PixelShaderUid& uid);

// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
void Shutdown();
@@ -112,13 +89,6 @@ class ShaderCache
// Find a pipeline by the specified description, if not found, attempts to create it.
VkPipeline GetPipeline(const PipelineInfo& info);

// Find a pipeline by the specified description, if not found, attempts to create it. If this
// resulted in a pipeline being created, the second field of the return value will be false,
// otherwise for a cache hit it will be true.
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
std::pair<std::pair<VkPipeline, bool>, bool>
GetPipelineWithCacheResultAsync(const PipelineInfo& info);

// Creates a compute pipeline, and does not track the handle.
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);

@@ -139,47 +109,22 @@ class ShaderCache
void RecompileSharedShaders();

// Reload pipeline cache. This will destroy all pipelines.
void ReloadShaderAndPipelineCaches();
void ReloadPipelineCache();

// Shared shader accessors
VkShaderModule GetScreenQuadVertexShader() const { return m_screen_quad_vertex_shader; }
VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; }
VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; }
VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; }
void PrecompileUberShaders();
void WaitForBackgroundCompilesToComplete();
void RetrieveAsyncShaders();

private:
bool CreatePipelineCache();
bool LoadPipelineCache();
bool ValidatePipelineCache(const u8* data, size_t data_length);
void DestroyPipelineCache();
void LoadShaderCaches();
void DestroyShaderCaches();
bool CompileSharedShaders();
void DestroySharedShaders();

// We generate a dummy pipeline with some defaults in the blend/depth states,
// that way the driver is forced to compile something (looking at you, NVIDIA).
// It can then hopefully re-use part of this pipeline for others in the future.
void CreateDummyPipeline(const UberShader::VertexShaderUid& vuid, const GeometryShaderUid& guid,
const UberShader::PixelShaderUid& puid);

template <typename Uid>
struct ShaderModuleCache
{
std::map<Uid, std::pair<VkShaderModule, bool>> shader_map;
LinearDiskCache<Uid, u32> disk_cache;
};
ShaderModuleCache<VertexShaderUid> m_vs_cache;
ShaderModuleCache<GeometryShaderUid> m_gs_cache;
ShaderModuleCache<PixelShaderUid> m_ps_cache;
ShaderModuleCache<UberShader::VertexShaderUid> m_uber_vs_cache;
ShaderModuleCache<UberShader::PixelShaderUid> m_uber_ps_cache;

std::unordered_map<PipelineInfo, std::pair<VkPipeline, bool>, PipelineInfoHash>
m_pipeline_objects;
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
m_compute_pipeline_objects;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
@@ -190,45 +135,6 @@ class ShaderCache
VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE;
VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE;
VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE;

std::unique_ptr<VideoCommon::AsyncShaderCompiler> m_async_shader_compiler;

// TODO: Use templates to reduce the number of these classes.
class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit VertexShaderCompilerWorkItem(const VertexShaderUid& uid) : m_uid(uid) {}
bool Compile() override;
void Retrieve() override;

private:
VertexShaderUid m_uid;
ShaderCompiler::SPIRVCodeVector m_spirv;
VkShaderModule m_module = VK_NULL_HANDLE;
};
class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit PixelShaderCompilerWorkItem(const PixelShaderUid& uid) : m_uid(uid) {}
bool Compile() override;
void Retrieve() override;

private:
PixelShaderUid m_uid;
ShaderCompiler::SPIRVCodeVector m_spirv;
VkShaderModule m_module = VK_NULL_HANDLE;
};
class PipelineCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
explicit PipelineCompilerWorkItem(const PipelineInfo& info) : m_info(info) {}
bool Compile() override;
void Retrieve() override;

private:
PipelineInfo m_info;
VkPipeline m_pipeline;
};
};

extern std::unique_ptr<ShaderCache> g_shader_cache;

Large diffs are not rendered by default.

@@ -9,19 +9,14 @@
#include <memory>

#include "Common/CommonTypes.h"
#include "Common/LinearDiskCache.h"
#include "VideoBackends/Vulkan/Constants.h"
#include "VideoBackends/Vulkan/ShaderCache.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"

namespace Vulkan
{
class VKPipeline;
class StreamBuffer;
class VertexFormat;

@@ -35,31 +30,18 @@ class StateTracker
static bool CreateInstance();
static void DestroyInstance();

const RasterizationState& GetRasterizationState() const
{
return m_pipeline_state.rasterization_state;
}
const DepthState& GetDepthStencilState() const { return m_pipeline_state.depth_state; }
const BlendingState& GetBlendState() const { return m_pipeline_state.blend_state; }
const std::array<VkDescriptorImageInfo, NUM_PIXEL_SHADER_SAMPLERS>& GetPSSamplerBindings() const
{
return m_bindings.ps_samplers;
}
VkFramebuffer GetFramebuffer() const { return m_framebuffer; }
const VKPipeline* GetPipeline() const { return m_pipeline; }
void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset);
void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type);

void SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass);
void SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area);
void SetVertexFormat(const VertexFormat* vertex_format);

void SetRasterizationState(const RasterizationState& state);
void SetMultisamplingstate(const MultisamplingState& state);
void SetDepthState(const DepthState& state);
void SetBlendState(const BlendingState& state);

bool CheckForShaderChanges();
void ClearShaders();
void SetPipeline(const VKPipeline* pipeline);

void UpdateVertexShaderConstants();
void UpdateGeometryShaderConstants();
@@ -68,7 +50,6 @@ class StateTracker
void SetTexture(size_t index, VkImageView view);
void SetSampler(size_t index, VkSampler sampler);

void SetBBoxEnable(bool enable);
void SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range);

void UnbindTexture(VkImageView view);
@@ -117,30 +98,11 @@ class StateTracker

bool IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const;

// Reloads the UID cache, ensuring all pipelines used by the game so far have been created.
void ReloadPipelineUIDCache();

// Clears shader pointers, ensuring that now-deleted modules are not used.
void InvalidateShaderPointers();

private:
// Serialized version of PipelineInfo, used when loading/saving the pipeline UID cache.
struct SerializedPipelineUID
{
u32 rasterizer_state_bits;
u32 depth_state_bits;
u32 blend_state_bits;
PortableVertexDeclaration vertex_decl;
VertexShaderUid vs_uid;
GeometryShaderUid gs_uid;
PixelShaderUid ps_uid;
};

// Number of descriptor sets for game draws.
enum
{
NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS + 1,
NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1
NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1
};

enum DITRY_FLAG : u32
@@ -157,36 +119,17 @@ class StateTracker
DIRTY_FLAG_SCISSOR = (1 << 9),
DIRTY_FLAG_PIPELINE = (1 << 10),
DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11),
DIRTY_FLAG_PIPELINE_BINDING = (1 << 12),

DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO |
DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO
};

bool Initialize();

// Appends the specified pipeline info, combined with the UIDs stored in the class.
// The info is here so that we can store variations of a UID, e.g. blend state.
void AppendToPipelineUIDCache(const PipelineInfo& info);

// Precaches a pipeline based on the UID information.
bool PrecachePipelineUID(const SerializedPipelineUID& uid);

// Check that the specified viewport is within the render area.
// If not, ends the render pass if it is a clear render pass.
bool IsViewportWithinRenderArea() const;

// Obtains a Vulkan pipeline object for the specified pipeline configuration.
// Also adds this pipeline configuration to the UID cache if it is not present already.
VkPipeline GetPipelineAndCacheUID();

// Are bounding box ubershaders enabled? If so, we need to ensure the SSBO is set up,
// since the bbox writes are determined by a uniform.
bool IsSSBODescriptorRequired() const;

bool UpdatePipeline();
void UpdatePipelineLayout();
void UpdatePipelineVertexFormat();
bool UpdateDescriptorSet();

// Allocates storage in the uniform buffer of the specified size. If this storage cannot be
@@ -205,18 +148,8 @@ class StateTracker
VkDeviceSize m_index_buffer_offset = 0;
VkIndexType m_index_type = VK_INDEX_TYPE_UINT16;

// shader state
VertexShaderUid m_vs_uid = {};
GeometryShaderUid m_gs_uid = {};
PixelShaderUid m_ps_uid = {};
UberShader::VertexShaderUid m_uber_vs_uid = {};
UberShader::PixelShaderUid m_uber_ps_uid = {};
bool m_using_ubershaders = false;

// pipeline state
PipelineInfo m_pipeline_state = {};
VkPipeline m_pipeline_object = VK_NULL_HANDLE;
const VertexFormat* m_vertex_format = nullptr;
const VKPipeline* m_pipeline = nullptr;

// shader bindings
std::array<VkDescriptorSet, NUM_DESCRIPTOR_SET_BIND_POINTS> m_descriptor_sets = {};
@@ -230,8 +163,8 @@ class StateTracker

VkDescriptorBufferInfo ps_ssbo = {};
} m_bindings;
u32 m_num_active_descriptor_sets = 0;
size_t m_uniform_buffer_reserve_size = 0;
u32 m_num_active_descriptor_sets = 0;

// rasterization
VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
@@ -246,18 +179,11 @@ class StateTracker
VkRenderPass m_current_render_pass = VK_NULL_HANDLE;
VkRect2D m_framebuffer_size = {};
VkRect2D m_framebuffer_render_area = {};
bool m_bbox_enabled = false;

// CPU access tracking
u32 m_draw_counter = 0;
std::vector<u32> m_cpu_accesses_this_frame;
std::vector<u32> m_scheduled_command_buffer_kicks;
bool m_allow_background_execution = true;

// Draw state cache on disk
// We don't actually use the value field here, instead we generate the shaders from the uid
// on-demand. If all goes well, it should hit the shader and Vulkan pipeline cache, therefore
// loading should be reasonably efficient.
LinearDiskCache<SerializedPipelineUID, u32> m_uid_cache;
};
}
@@ -592,16 +592,19 @@ void UtilityShaderDraw::BindDescriptors()
&dummy_uniform_buffer,
nullptr};

set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
UBO_DESCRIPTOR_SET_BINDING_GS,
0,
1,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
nullptr,
&dummy_uniform_buffer,
nullptr};
if (g_vulkan_context->SupportsGeometryShaders())
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
UBO_DESCRIPTOR_SET_BINDING_GS,
0,
1,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
nullptr,
&dummy_uniform_buffer,
nullptr};
}

set_writes[num_set_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, UBO_DESCRIPTOR_SET_BINDING_PS, 0, 1,
@@ -14,7 +14,9 @@

namespace Vulkan
{
VKPipeline::VKPipeline(VkPipeline pipeline) : m_pipeline(pipeline)
VKPipeline::VKPipeline(VkPipeline pipeline, VkPipelineLayout pipeline_layout,
AbstractPipelineUsage usage)
: m_pipeline(pipeline), m_pipeline_layout(pipeline_layout), m_usage(usage)
{
}

@@ -30,7 +32,8 @@ std::unique_ptr<VKPipeline> VKPipeline::Create(const AbstractPipelineConfig& con
// Get render pass for config.
VkRenderPass render_pass = g_object_cache->GetRenderPass(
Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.color_texture_format),
VK_FORMAT_UNDEFINED, config.framebuffer_state.samples, VK_ATTACHMENT_LOAD_OP_LOAD);
Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.depth_texture_format),
config.framebuffer_state.samples, VK_ATTACHMENT_LOAD_OP_LOAD);

// Get pipeline layout.
VkPipelineLayout pipeline_layout;
@@ -68,6 +71,6 @@ std::unique_ptr<VKPipeline> VKPipeline::Create(const AbstractPipelineConfig& con
if (pipeline == VK_NULL_HANDLE)
return nullptr;

return std::make_unique<VKPipeline>(pipeline);
return std::make_unique<VKPipeline>(pipeline, pipeline_layout, config.usage);
}
} // namespace Vulkan
@@ -14,14 +14,19 @@ namespace Vulkan
class VKPipeline final : public AbstractPipeline
{
public:
explicit VKPipeline(VkPipeline pipeline);
explicit VKPipeline(VkPipeline pipeline, VkPipelineLayout pipeline_layout,
AbstractPipelineUsage usage);
~VKPipeline() override;

VkPipeline GetPipeline() const { return m_pipeline; }
VkPipeline GetVkPipeline() const { return m_pipeline; }
VkPipelineLayout GetVkPipelineLayout() const { return m_pipeline_layout; }
AbstractPipelineUsage GetUsage() const { return m_usage; }
static std::unique_ptr<VKPipeline> Create(const AbstractPipelineConfig& config);

private:
VkPipeline m_pipeline;
VkPipelineLayout m_pipeline_layout;
AbstractPipelineUsage m_usage;
};

} // namespace Vulkan
@@ -2,6 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "Common/Align.h"
#include "Common/Assert.h"

#include "VideoBackends/Vulkan/ShaderCompiler.h"
@@ -103,20 +104,11 @@ std::unique_ptr<VKShader> VKShader::CreateFromSource(ShaderStage stage, const ch
std::unique_ptr<VKShader> VKShader::CreateFromBinary(ShaderStage stage, const void* data,
size_t length)
{
ShaderCompiler::SPIRVCodeVector spv;
const size_t size_in_words = sizeof(length) / sizeof(ShaderCompiler::SPIRVCodeType);
if (size_in_words > 0)
{
spv.resize(length / size_in_words);
std::memcpy(spv.data(), data, size_in_words);
}

// Non-aligned code sizes, unlikely (unless using VK_NV_glsl).
if ((length % sizeof(ShaderCompiler::SPIRVCodeType)) != 0)
{
spv.resize(size_in_words + 1);
std::memcpy(&spv[size_in_words], data, (length % sizeof(ShaderCompiler::SPIRVCodeType)));
}
const size_t size_in_words = Common::AlignUp(length, sizeof(ShaderCompiler::SPIRVCodeType)) /
sizeof(ShaderCompiler::SPIRVCodeType);
ShaderCompiler::SPIRVCodeVector spv(size_in_words);
if (length > 0)
std::memcpy(spv.data(), data, length);

return CreateShaderObject(stage, std::move(spv));
}
@@ -139,8 +139,6 @@ void VertexManager::vFlush()
u32 index_count = IndexGenerator::GetIndexLen();

// Update tracked state
StateTracker::GetInstance()->SetVertexFormat(vertex_format);
StateTracker::GetInstance()->CheckForShaderChanges();
StateTracker::GetInstance()->UpdateVertexShaderConstants();
StateTracker::GetInstance()->UpdateGeometryShaderConstants();
StateTracker::GetInstance()->UpdatePixelShaderConstants();
@@ -165,12 +163,10 @@ void VertexManager::vFlush()
bounding_box->Flush();
bounding_box->Invalidate();
}

// Update which descriptor set/pipeline layout to use.
StateTracker::GetInstance()->SetBBoxEnable(bounding_box_enabled);
}

// Bind all pending state to the command buffer
g_renderer->SetPipeline(m_current_pipeline_object);
if (!StateTracker::GetInstance()->Bind())
{
WARN_LOG(VIDEO, "Skipped draw of %u indices", index_count);
@@ -235,6 +235,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsBitfield = true; // Assumed support.
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
config->backend_info.bSupportsGeometryShaders = false; // Dependent on features.
config->backend_info.bSupportsGSInstancing = false; // Dependent on features.
@@ -223,42 +223,40 @@ bool VideoBackend::Initialize(void* window_handle)
g_renderer = std::make_unique<Renderer>(std::move(swap_chain));
g_vertex_manager = std::make_unique<VertexManager>();
g_texture_cache = std::make_unique<TextureCache>();
::g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
g_perf_query = std::make_unique<PerfQuery>();

// Invoke init methods on main wrapper classes.
// These have to be done before the others because the destructors
// for the remaining classes may call methods on these.
if (!StateTracker::CreateInstance() || !FramebufferManager::GetInstance()->Initialize() ||
!Renderer::GetInstance()->Initialize() || !VertexManager::GetInstance()->Initialize() ||
!TextureCache::GetInstance()->Initialize() || !PerfQuery::GetInstance()->Initialize())
!TextureCache::GetInstance()->Initialize() || !PerfQuery::GetInstance()->Initialize() ||
!::g_shader_cache->Initialize())
{
PanicAlert("Failed to initialize Vulkan classes.");
Shutdown();
return false;
}

// Ensure all pipelines previously used by the game have been created.
StateTracker::GetInstance()->ReloadPipelineUIDCache();

// Lastly, precompile ubershaders, if requested.
// This has to be done after the texture cache and shader cache are initialized.
if (g_ActiveConfig.CanPrecompileUberShaders())
g_shader_cache->PrecompileUberShaders();

// Display the name so the user knows which device was actually created.
INFO_LOG(VIDEO, "Vulkan Device: %s", g_vulkan_context->GetDeviceProperties().deviceName);
return true;
}

void VideoBackend::Shutdown()
{
if (g_renderer)
g_renderer->Shutdown();

if (g_command_buffer_mgr)
g_command_buffer_mgr->WaitForGPUIdle();

if (::g_shader_cache)
::g_shader_cache->Shutdown();

if (g_renderer)
g_renderer->Shutdown();

g_perf_query.reset();
::g_shader_cache.reset();
g_texture_cache.reset();
g_vertex_manager.reset();
g_renderer.reset();
@@ -18,7 +18,6 @@ AsyncShaderCompiler::~AsyncShaderCompiler()
// Pending work can be left at shutdown.
// The work item classes are expected to clean up after themselves.
_assert_(!HasWorkerThreads());
_assert_(m_completed_work.empty());
}

void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item)