@@ -312,7 +312,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
if (shader.gsid)
glAttachShader(shader.glprogid, shader.gsid);

if (g_ogl_config.bSupportsGLSLCache)
if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData)
glProgramParameteri(shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);

shader.SetProgramBindings(false);
@@ -562,10 +562,12 @@ void ProgramShaderCache::InvalidateLastProgram()
CurrentProgram = 0;
}

const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader)
PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader,
const void* cache_data,
size_t cache_data_size)
{
PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0,
geometry_shader ? geometry_shader->GetID() : 0,
@@ -580,39 +582,69 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm
}
}

// We temporarily change the vertex array to the pipeline's vertex format.
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
if (s_is_shared_context || vao != s_last_VAO)
glBindVertexArray(vao);

std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
prog->key = key;

// Attach shaders.
ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex);
ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel);
prog->shader.glprogid = glCreateProgram();
glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID());
glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID());
if (geometry_shader)

// Use the cache data, if present. If this fails, we want to return an error, so the shader cache
// doesn't attempt to use the same binary data in the future.
if (cache_data_size >= sizeof(u32))
{
ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry);
glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID());
u32 program_binary_type;
std::memcpy(&program_binary_type, cache_data, sizeof(u32));
glProgramBinary(prog->shader.glprogid, static_cast<GLenum>(program_binary_type),
static_cast<const u8*>(cache_data) + sizeof(u32),
static_cast<GLsizei>(cache_data_size - sizeof(u32)));

// Check the link status. If this fails, it means the binary was invalid.
GLint link_status;
glGetProgramiv(prog->shader.glprogid, GL_LINK_STATUS, &link_status);
if (link_status != GL_TRUE)
{
WARN_LOG(VIDEO, "Failed to create GL program from program binary.");
prog->shader.Destroy();
return nullptr;
}

// We don't want to retrieve this binary and duplicate entries in the cache again.
// See the explanation in OGLPipeline.cpp.
prog->binary_retrieved = true;
}
else
{
// We temporarily change the vertex array to the pipeline's vertex format.
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
if (s_is_shared_context || vao != s_last_VAO)
glBindVertexArray(vao);

// Attach shaders.
ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex);
ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel);
glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID());
glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID());
if (geometry_shader)
{
ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry);
glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID());
}

// Link program.
prog->shader.SetProgramBindings(false);
glLinkProgram(prog->shader.glprogid);
if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData)
glProgramParameteri(prog->shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);

// Restore VAO binding after linking.
if (!s_is_shared_context && vao != s_last_VAO)
glBindVertexArray(s_last_VAO);
// Link program.
prog->shader.SetProgramBindings(false);
glLinkProgram(prog->shader.glprogid);

if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {}))
{
prog->shader.Destroy();
return nullptr;
// Restore VAO binding after linking.
if (!s_is_shared_context && vao != s_last_VAO)
glBindVertexArray(s_last_VAO);

if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {}))
{
prog->shader.Destroy();
return nullptr;
}
}

// Lock to insert. A duplicate program may have been created in the meantime.
@@ -639,16 +671,17 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm
return ip.first->second.get();
}

void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog)
void ProgramShaderCache::ReleasePipelineProgram(PipelineProgram* prog)
{
if (--prog->reference_count > 0)
return;

prog->shader.Destroy();

std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
auto iter = s_pipeline_programs.find(prog->key);
ASSERT(iter != s_pipeline_programs.end() && prog == iter->second.get());

if (--iter->second->reference_count == 0)
{
iter->second->shader.Destroy();
s_pipeline_programs.erase(iter);
}
s_pipeline_programs.erase(iter);
}

void ProgramShaderCache::CreateHeader()
@@ -63,6 +63,7 @@ struct PipelineProgram
PipelineProgramKey key;
SHADER shader;
std::atomic_size_t reference_count{1};
bool binary_retrieved = false;
};

class ProgramShaderCache
@@ -97,11 +98,12 @@ class ProgramShaderCache
// pipeline do not match the pipeline configuration.
static u64 GenerateShaderID();

static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader);
static void ReleasePipelineProgram(const PipelineProgram* prog);
static PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader,
const OGLShader* pixel_shader, const void* cache_data,
size_t cache_data_size);
static void ReleasePipelineProgram(PipelineProgram* prog);

private:
typedef std::unordered_map<PipelineProgramKey, std::unique_ptr<PipelineProgram>,
@@ -350,6 +350,7 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
}

bool bSuccess = true;
bool supports_glsl_cache = false;

g_ogl_config.gl_vendor = (const char*)glGetString(GL_VENDOR);
g_ogl_config.gl_renderer = (const char*)glGetString(GL_RENDERER);
@@ -466,7 +467,7 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
GLExtensions::Supports("GL_ARB_gpu_shader5");

g_ogl_config.bIsES = m_main_gl_context->IsGLES();
g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary");
supports_glsl_cache = GLExtensions::Supports("GL_ARB_get_program_binary");
g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory");
g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync");
g_ogl_config.bSupportsGLBaseVertex = GLExtensions::Supports("GL_ARB_draw_elements_base_vertex") ||
@@ -507,7 +508,7 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
EsTexbufType::TexbufExt :
EsTexbufType::TexbufNone;

g_ogl_config.bSupportsGLSLCache = true;
supports_glsl_cache = true;
g_ogl_config.bSupportsGLSync = true;

// TODO: Implement support for GL_EXT_clip_cull_distance when there is an extension for
@@ -675,6 +676,16 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
g_Config.backend_info.bSupportsBackgroundCompiling =
!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION);

// Program binaries are supported on GL4.1+, ARB_get_program_binary, or ES3.
if (supports_glsl_cache)
{
// We need to check the number of formats supported. If zero, don't bother getting the binaries.
GLint num_formats = 0;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
supports_glsl_cache = num_formats > 0;
}
g_Config.backend_info.bSupportsPipelineCacheData = supports_glsl_cache;

if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))
@@ -739,7 +750,7 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ",
g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ",
g_ogl_config.bSupportsGLPinnedMemory ? "" : "PinnedMemory ",
g_ogl_config.bSupportsGLSLCache ? "" : "ShaderCache ",
supports_glsl_cache ? "" : "ShaderCache ",
g_ogl_config.bSupportsGLBaseVertex ? "" : "BaseVertex ",
g_ogl_config.bSupportsGLBufferStorage ? "" : "BufferStorage ",
g_ogl_config.bSupportsGLSync ? "" : "Sync ", g_ogl_config.bSupportsMSAA ? "" : "MSAA ",
@@ -828,9 +839,11 @@ std::unique_ptr<AbstractShader> Renderer::CreateShaderFromBinary(ShaderStage sta
return nullptr;
}

std::unique_ptr<AbstractPipeline> Renderer::CreatePipeline(const AbstractPipelineConfig& config)
std::unique_ptr<AbstractPipeline> Renderer::CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data,
size_t cache_data_length)
{
return OGLPipeline::Create(config);
return OGLPipeline::Create(config, cache_data, cache_data_length);
}

void Renderer::SetScissorRect(const MathUtil::Rectangle<int>& rc)
@@ -48,7 +48,6 @@ enum class EsFbFetchType
struct VideoConfig
{
bool bIsES;
bool bSupportsGLSLCache;
bool bSupportsGLPinnedMemory;
bool bSupportsGLSync;
bool bSupportsGLBaseVertex;
@@ -102,7 +101,9 @@ class Renderer : public ::Renderer
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;

@@ -90,6 +90,8 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsCopyToVram = true;
g_Config.backend_info.bSupportsLargePoints = true;
g_Config.backend_info.bSupportsPartialDepthCopies = true;
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;

// TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics
// options will show the option when it is not supported. The only way around this would be
@@ -64,7 +64,6 @@ class SWShader final : public AbstractShader
explicit SWShader(ShaderStage stage) : AbstractShader(stage) {}
~SWShader() = default;

bool HasBinary() const override { return false; }
BinaryData GetBinary() const override { return {}; }
};

@@ -87,7 +86,9 @@ class SWPipeline final : public AbstractPipeline
~SWPipeline() override = default;
};

std::unique_ptr<AbstractPipeline> SWRenderer::CreatePipeline(const AbstractPipelineConfig& config)
std::unique_ptr<AbstractPipeline> SWRenderer::CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data,
size_t cache_data_length)
{
return std::make_unique<SWPipeline>();
}
@@ -33,7 +33,9 @@ class SWRenderer final : public Renderer
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;

u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {}
@@ -74,6 +74,8 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
g_Config.backend_info.bSupportsLogicOp = true;
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;

// aamodes
g_Config.backend_info.AAModes = {1};
@@ -112,7 +112,9 @@ Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
return std::make_unique<VertexFormat>(vtx_decl);
}

std::unique_ptr<AbstractPipeline> Renderer::CreatePipeline(const AbstractPipelineConfig& config)
std::unique_ptr<AbstractPipeline> Renderer::CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data,
size_t cache_data_length)
{
return VKPipeline::Create(config);
}
@@ -48,7 +48,9 @@ class Renderer : public ::Renderer
size_t length) override;
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;

SwapChain* GetSwapChain() const { return m_swap_chain.get(); }
BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); }
@@ -32,12 +32,6 @@ VKShader::~VKShader()
vkDestroyPipeline(g_vulkan_context->GetDevice(), m_compute_pipeline, nullptr);
}

bool VKShader::HasBinary() const
{
ASSERT(!m_spv.empty());
return true;
}

AbstractShader::BinaryData VKShader::GetBinary() const
{
BinaryData ret(sizeof(u32) * m_spv.size());
@@ -23,7 +23,6 @@ class VKShader final : public AbstractShader

VkShaderModule GetShaderModule() const { return m_module; }
VkPipeline GetComputePipeline() const { return m_compute_pipeline; }
bool HasBinary() const override;
BinaryData GetBinary() const override;

static std::unique_ptr<VKShader> CreateFromSource(ShaderStage stage, const char* source,
@@ -266,6 +266,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
config->backend_info.bSupportsBitfield = true; // Assumed support.
config->backend_info.bSupportsPartialDepthCopies = true; // Assumed support.
config->backend_info.bSupportsShaderBinaries = true; // Assumed support.
config->backend_info.bSupportsPipelineCacheData = false; // Handled via pipeline caches.
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support.
@@ -75,4 +75,10 @@ class AbstractPipeline
public:
AbstractPipeline() = default;
virtual ~AbstractPipeline() = default;

// "Cache data" can be used to assist a driver with creating pipelines by using previously
// compiled shader ISA. The abstract shaders and creation struct are still required to create
// pipeline objects, the cache is optionally used by the driver to speed up compilation.
using CacheData = std::vector<u8>;
virtual CacheData GetCacheData() const { return {}; }
};
@@ -25,9 +25,11 @@ class AbstractShader
virtual ~AbstractShader() = default;

ShaderStage GetStage() const { return m_stage; }

// Shader binaries represent the input source code in a lower-level form. e.g. SPIR-V or DXBC.
// The shader source code is not required to create a shader object from the binary.
using BinaryData = std::vector<u8>;
virtual bool HasBinary() const = 0;
virtual BinaryData GetBinary() const = 0;
virtual BinaryData GetBinary() const { return {}; }

protected:
ShaderStage m_stage;
@@ -99,6 +99,16 @@ struct SerializedGXPipelineUid
u32 depth_state_bits;
u32 blending_state_bits;
};
struct SerializedGXUberPipelineUid
{
PortableVertexDeclaration vertex_decl;
UberShader::VertexShaderUid vs_uid;
GeometryShaderUid gs_uid;
UberShader::PixelShaderUid ps_uid;
u32 rasterization_state_bits;
u32 depth_state_bits;
u32 blending_state_bits;
};
#pragma pack(pop)

} // namespace VideoCommon
@@ -130,8 +130,9 @@ class Renderer
CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) = 0;
virtual std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0;
virtual std::unique_ptr<AbstractPipeline>
CreatePipeline(const AbstractPipelineConfig& config) = 0;
virtual std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) = 0;
std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage,
const std::string& source);

@@ -25,8 +25,7 @@ namespace VideoCommon
ShaderCache::ShaderCache() = default;
ShaderCache::~ShaderCache()
{
ClearShaderCaches();
ClearPipelineCaches();
ClearCaches();
}

bool ShaderCache::Initialize()
@@ -48,7 +47,7 @@ void ShaderCache::InitializeShaderCache()
// Load shader and UID caches.
if (g_ActiveConfig.bShaderCache && m_api_type != APIType::Nothing)
{
LoadShaderCaches();
LoadCaches();
LoadPipelineUIDCache();
}

@@ -69,11 +68,10 @@ void ShaderCache::Reload()
{
WaitForAsyncCompiler();
ClosePipelineUIDCache();
InvalidateCachedPipelines();
ClearShaderCaches();
ClearCaches();

if (g_ActiveConfig.bShaderCache)
LoadShaderCaches();
LoadCaches();

// Switch to the precompiling shader configuration while we rebuild.
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
@@ -140,7 +138,7 @@ const AbstractPipeline* ShaderCache::GetUberPipelineForUid(const GXUberPipelineU
return it->second.first.get();

std::unique_ptr<AbstractPipeline> pipeline;
std::optional<AbstractPipelineConfig> pipeline_config = GetGXUberPipelineConfig(uid);
std::optional<AbstractPipelineConfig> pipeline_config = GetGXPipelineConfig(uid);
if (pipeline_config)
pipeline = g_renderer->CreatePipeline(*pipeline_config);
return InsertGXUberPipeline(uid, std::move(pipeline));
@@ -176,8 +174,34 @@ void ShaderCache::WaitForAsyncCompiler()
}
}

template <typename SerializedUidType, typename UidType>
static void SerializePipelineUid(const UidType& uid, SerializedUidType& serialized_uid)
{
// Convert to disk format. Ensure all padding bytes are zero.
std::memset(&serialized_uid, 0, sizeof(serialized_uid));
serialized_uid.vertex_decl = uid.vertex_format->GetVertexDeclaration();
serialized_uid.vs_uid = uid.vs_uid;
serialized_uid.gs_uid = uid.gs_uid;
serialized_uid.ps_uid = uid.ps_uid;
serialized_uid.rasterization_state_bits = uid.rasterization_state.hex;
serialized_uid.depth_state_bits = uid.depth_state.hex;
serialized_uid.blending_state_bits = uid.blending_state.hex;
}

template <typename UidType, typename SerializedUidType>
static void UnserializePipelineUid(const SerializedUidType& uid, UidType& real_uid)
{
real_uid.vertex_format = VertexLoaderManager::GetOrCreateMatchingFormat(uid.vertex_decl);
real_uid.vs_uid = uid.vs_uid;
real_uid.gs_uid = uid.gs_uid;
real_uid.ps_uid = uid.ps_uid;
real_uid.rasterization_state.hex = uid.rasterization_state_bits;
real_uid.depth_state.hex = uid.depth_state_bits;
real_uid.blending_state.hex = uid.blending_state_bits;
}

template <ShaderStage stage, typename K, typename T>
static void LoadShaderCache(T& cache, APIType api_type, const char* type, bool include_gameid)
void ShaderCache::LoadShaderCache(T& cache, APIType api_type, const char* type, bool include_gameid)
{
class CacheReader : public LinearDiskCacheReader<K, u8>
{
@@ -219,38 +243,127 @@ static void LoadShaderCache(T& cache, APIType api_type, const char* type, bool i
}

template <typename T>
static void ClearShaderCache(T& cache)
void ShaderCache::ClearShaderCache(T& cache)
{
cache.disk_cache.Sync();
cache.disk_cache.Close();
cache.shader_map.clear();
}

void ShaderCache::LoadShaderCaches()
template <typename KeyType, typename DiskKeyType, typename T>
void ShaderCache::LoadPipelineCache(T& cache, LinearDiskCache<DiskKeyType, u8>& disk_cache,
APIType api_type, const char* type, bool include_gameid)
{
// Ubershader caches, if present.
LoadShaderCache<ShaderStage::Vertex, UberShader::VertexShaderUid>(m_uber_vs_cache, m_api_type,
"uber-vs", false);
LoadShaderCache<ShaderStage::Pixel, UberShader::PixelShaderUid>(m_uber_ps_cache, m_api_type,
"uber-ps", false);
class CacheReader : public LinearDiskCacheReader<DiskKeyType, u8>
{
public:
CacheReader(ShaderCache* this_ptr_, T& cache_) : this_ptr(this_ptr_), cache(cache_) {}
bool AnyFailed() const { return failed; }
void Read(const DiskKeyType& key, const u8* value, u32 value_size)
{
KeyType real_uid;
UnserializePipelineUid(key, real_uid);

// Skip those which are already compiled.
if (failed || cache.find(real_uid) != cache.end())
return;

auto config = this_ptr->GetGXPipelineConfig(real_uid);
if (!config)
return;

// We also share geometry shaders, as there aren't many variants.
if (m_host_config.backend_geometry_shaders)
LoadShaderCache<ShaderStage::Geometry, GeometryShaderUid>(m_gs_cache, m_api_type, "gs", false);
auto pipeline = g_renderer->CreatePipeline(*config, value, value_size);
if (!pipeline)
{
// If any of the pipelines fail to create, consider the cache stale.
failed = true;
return;
}

auto& entry = cache[real_uid];
entry.first = std::move(pipeline);
entry.second = false;
}

private:
ShaderCache* this_ptr;
T& cache;
bool failed = false;
};

std::string filename = GetDiskShaderCacheFileName(api_type, type, include_gameid, true);
CacheReader reader(this, cache);
u32 count = disk_cache.OpenAndRead(filename, reader);
INFO_LOG(VIDEO, "Loaded %u cached pipelines from %s", count, filename.c_str());

// If any of the pipelines in the cache failed to create, it's likely because of a change of
// driver version, or system configuration. In this case, when the UID cache picks up the pipeline
// later on, we'll write a duplicate entry to the pipeline cache. There's also no point in keeping
// the old cache data around, so discard and recreate the disk cache.
if (reader.AnyFailed())
{
WARN_LOG(VIDEO, "Failed to load one or more pipelines from cache '%s'. Discarding.",
filename.c_str());
disk_cache.Close();
File::Delete(filename);
disk_cache.OpenAndRead(filename, reader);
}
}

template <typename T, typename Y>
void ShaderCache::ClearPipelineCache(T& cache, Y& disk_cache)
{
disk_cache.Sync();
disk_cache.Close();

// Set the pending flag to false, and destroy the pipeline.
for (auto& it : cache)
{
it.second.first.reset();
it.second.second = false;
}
}

// Specialized shaders, gameid-specific.
LoadShaderCache<ShaderStage::Vertex, VertexShaderUid>(m_vs_cache, m_api_type, "specialized-vs",
void ShaderCache::LoadCaches()
{
// Ubershader caches, if present.
if (g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
LoadShaderCache<ShaderStage::Vertex, UberShader::VertexShaderUid>(m_uber_vs_cache, m_api_type,
"uber-vs", false);
LoadShaderCache<ShaderStage::Pixel, UberShader::PixelShaderUid>(m_uber_ps_cache, m_api_type,
"uber-ps", false);

// We also share geometry shaders, as there aren't many variants.
if (m_host_config.backend_geometry_shaders)
LoadShaderCache<ShaderStage::Geometry, GeometryShaderUid>(m_gs_cache, m_api_type, "gs",
false);

// Specialized shaders, gameid-specific.
LoadShaderCache<ShaderStage::Vertex, VertexShaderUid>(m_vs_cache, m_api_type, "specialized-vs",
true);
LoadShaderCache<ShaderStage::Pixel, PixelShaderUid>(m_ps_cache, m_api_type, "specialized-ps",
true);
LoadShaderCache<ShaderStage::Pixel, PixelShaderUid>(m_ps_cache, m_api_type, "specialized-ps",
true);
}

if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData)
{
LoadPipelineCache<GXPipelineUid, SerializedGXPipelineUid>(
m_gx_pipeline_cache, m_gx_pipeline_disk_cache, m_api_type, "specialized-pipeline", true);
LoadPipelineCache<GXUberPipelineUid, SerializedGXUberPipelineUid>(
m_gx_uber_pipeline_cache, m_gx_uber_pipeline_disk_cache, m_api_type, "uber-pipeline",
false);
}
}

void ShaderCache::ClearShaderCaches()
void ShaderCache::ClearCaches()
{
ClearPipelineCache(m_gx_pipeline_cache, m_gx_pipeline_disk_cache);
ClearShaderCache(m_vs_cache);
ClearShaderCache(m_gs_cache);
ClearShaderCache(m_ps_cache);

ClearPipelineCache(m_gx_uber_pipeline_cache, m_gx_uber_pipeline_disk_cache);
ClearShaderCache(m_uber_vs_cache);
ClearShaderCache(m_uber_ps_cache);

@@ -265,37 +378,16 @@ void ShaderCache::CompileMissingPipelines()
// Queue all uids with a null pipeline for compilation.
for (auto& it : m_gx_pipeline_cache)
{
if (!it.second.second)
if (!it.second.first)
QueuePipelineCompile(it.first, COMPILE_PRIORITY_SHADERCACHE_PIPELINE);
}
for (auto& it : m_gx_uber_pipeline_cache)
{
if (!it.second.second)
if (!it.second.first)
QueueUberPipelineCompile(it.first, COMPILE_PRIORITY_UBERSHADER_PIPELINE);
}
}

void ShaderCache::InvalidateCachedPipelines()
{
// Set the pending flag to false, and destroy the pipeline.
for (auto& it : m_gx_pipeline_cache)
{
it.second.first.reset();
it.second.second = false;
}
for (auto& it : m_gx_uber_pipeline_cache)
{
it.second.first.reset();
it.second.second = false;
}
}

void ShaderCache::ClearPipelineCaches()
{
m_gx_pipeline_cache.clear();
m_gx_uber_pipeline_cache.clear();
}

std::unique_ptr<AbstractShader> ShaderCache::CompileVertexShader(const VertexShaderUid& uid) const
{
ShaderCode source_code = GenerateVertexShaderCode(m_api_type, m_host_config, uid.GetUidData());
@@ -334,7 +426,7 @@ const AbstractShader* ShaderCache::InsertVertexShader(const VertexShaderUid& uid

if (shader && !entry.shader)
{
if (g_ActiveConfig.bShaderCache && shader->HasBinary())
if (g_ActiveConfig.bShaderCache && g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
auto binary = shader->GetBinary();
if (!binary.empty())
@@ -356,7 +448,7 @@ const AbstractShader* ShaderCache::InsertVertexUberShader(const UberShader::Vert

if (shader && !entry.shader)
{
if (g_ActiveConfig.bShaderCache && shader->HasBinary())
if (g_ActiveConfig.bShaderCache && g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
auto binary = shader->GetBinary();
if (!binary.empty())
@@ -378,7 +470,7 @@ const AbstractShader* ShaderCache::InsertPixelShader(const PixelShaderUid& uid,

if (shader && !entry.shader)
{
if (g_ActiveConfig.bShaderCache && shader->HasBinary())
if (g_ActiveConfig.bShaderCache && g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
auto binary = shader->GetBinary();
if (!binary.empty())
@@ -400,7 +492,7 @@ const AbstractShader* ShaderCache::InsertPixelUberShader(const UberShader::Pixel

if (shader && !entry.shader)
{
if (g_ActiveConfig.bShaderCache && shader->HasBinary())
if (g_ActiveConfig.bShaderCache && g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
auto binary = shader->GetBinary();
if (!binary.empty())
@@ -425,7 +517,7 @@ const AbstractShader* ShaderCache::CreateGeometryShader(const GeometryShaderUid&

if (shader && !entry.shader)
{
if (g_ActiveConfig.bShaderCache && shader->HasBinary())
if (g_ActiveConfig.bShaderCache && g_ActiveConfig.backend_info.bSupportsShaderBinaries)
{
auto binary = shader->GetBinary();
if (!binary.empty())
@@ -505,7 +597,7 @@ std::optional<AbstractPipelineConfig> ShaderCache::GetGXPipelineConfig(const GXP
}

std::optional<AbstractPipelineConfig>
ShaderCache::GetGXUberPipelineConfig(const GXUberPipelineUid& config)
ShaderCache::GetGXPipelineConfig(const GXUberPipelineUid& config)
{
const AbstractShader* vs;
auto vs_iter = m_uber_vs_cache.shader_map.find(config.vs_uid);
@@ -551,6 +643,18 @@ const AbstractPipeline* ShaderCache::InsertGXPipeline(const GXPipelineUid& confi
if (!entry.first && pipeline)
entry.first = std::move(pipeline);

if (g_ActiveConfig.bShaderCache)
{
auto cache_data = entry.first->GetCacheData();
if (!cache_data.empty())
{
SerializedGXPipelineUid disk_uid;
SerializePipelineUid(config, disk_uid);
m_gx_pipeline_disk_cache.Append(disk_uid, cache_data.data(),
static_cast<u32>(cache_data.size()));
}
}

return entry.first.get();
}

@@ -563,6 +667,18 @@ ShaderCache::InsertGXUberPipeline(const GXUberPipelineUid& config,
if (!entry.first && pipeline)
entry.first = std::move(pipeline);

if (g_ActiveConfig.bShaderCache)
{
auto cache_data = entry.first->GetCacheData();
if (!cache_data.empty())
{
SerializedGXUberPipelineUid disk_uid;
SerializePipelineUid(config, disk_uid);
m_gx_uber_pipeline_disk_cache.Append(disk_uid, cache_data.data(),
static_cast<u32>(cache_data.size()));
}
}

return entry.first.get();
}

@@ -648,14 +764,8 @@ void ShaderCache::ClosePipelineUIDCache()

void ShaderCache::AddSerializedGXPipelineUID(const SerializedGXPipelineUid& uid)
{
GXPipelineUid real_uid = {};
real_uid.vertex_format = VertexLoaderManager::GetOrCreateMatchingFormat(uid.vertex_decl);
real_uid.vs_uid = uid.vs_uid;
real_uid.gs_uid = uid.gs_uid;
real_uid.ps_uid = uid.ps_uid;
real_uid.rasterization_state.hex = uid.rasterization_state_bits;
real_uid.depth_state.hex = uid.depth_state_bits;
real_uid.blending_state.hex = uid.blending_state_bits;
GXPipelineUid real_uid;
UnserializePipelineUid(uid, real_uid);

auto iter = m_gx_pipeline_cache.find(real_uid);
if (iter != m_gx_pipeline_cache.end())
@@ -671,16 +781,8 @@ void ShaderCache::AppendGXPipelineUID(const GXPipelineUid& config)
if (!m_gx_pipeline_uid_cache_file.IsOpen())
return;

// Convert to disk format. Ensure all padding bytes are zero.
SerializedGXPipelineUid disk_uid;
std::memset(&disk_uid, 0, sizeof(disk_uid));
disk_uid.vertex_decl = config.vertex_format->GetVertexDeclaration();
disk_uid.vs_uid = config.vs_uid;
disk_uid.gs_uid = config.gs_uid;
disk_uid.ps_uid = config.ps_uid;
disk_uid.rasterization_state_bits = config.rasterization_state.hex;
disk_uid.depth_state_bits = config.depth_state.hex;
disk_uid.blending_state_bits = config.blending_state.hex;
SerializePipelineUid(config, disk_uid);
if (!m_gx_pipeline_uid_cache_file.WriteBytes(&disk_uid, sizeof(disk_uid)))
{
WARN_LOG(VIDEO, "Writing pipeline UID to cache failed, closing file.");
@@ -885,7 +987,7 @@ void ShaderCache::QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 pri
// Check if all the stages required for this UberPipeline have been compiled.
// If not, this work item becomes a no-op, and re-queues the UberPipeline for the next frame.
if (SetStagesReady())
config = shader_cache->GetGXUberPipelineConfig(uid);
config = shader_cache->GetGXPipelineConfig(uid);
}

bool SetStagesReady()
@@ -111,13 +111,11 @@ class ShaderCache final
static constexpr size_t NUM_PALETTE_CONVERSION_SHADERS = 3;

void WaitForAsyncCompiler();
void LoadShaderCaches();
void ClearShaderCaches();
void LoadCaches();
void ClearCaches();
void LoadPipelineUIDCache();
void ClosePipelineUIDCache();
void CompileMissingPipelines();
void InvalidateCachedPipelines();
void ClearPipelineCaches();
void QueueUberShaderPipelines();
bool CompileSharedPipelines();

@@ -149,7 +147,7 @@ class ShaderCache final
const RasterizationState& rasterization_state, const DepthState& depth_state,
const BlendingState& blending_state);
std::optional<AbstractPipelineConfig> GetGXPipelineConfig(const GXPipelineUid& uid);
std::optional<AbstractPipelineConfig> GetGXUberPipelineConfig(const GXUberPipelineUid& uid);
std::optional<AbstractPipelineConfig> GetGXPipelineConfig(const GXUberPipelineUid& uid);
const AbstractPipeline* InsertGXPipeline(const GXPipelineUid& config,
std::unique_ptr<AbstractPipeline> pipeline);
const AbstractPipeline* InsertGXUberPipeline(const GXUberPipelineUid& config,
@@ -165,6 +163,17 @@ class ShaderCache final
void QueuePipelineCompile(const GXPipelineUid& uid, u32 priority);
void QueueUberPipelineCompile(const GXUberPipelineUid& uid, u32 priority);

// Populating various caches.
template <ShaderStage stage, typename K, typename T>
void LoadShaderCache(T& cache, APIType api_type, const char* type, bool include_gameid);
template <typename T>
void ClearShaderCache(T& cache);
template <typename KeyType, typename DiskKeyType, typename T>
void LoadPipelineCache(T& cache, LinearDiskCache<DiskKeyType, u8>& disk_cache, APIType api_type,
const char* type, bool include_gameid);
template <typename T, typename Y>
void ClearPipelineCache(T& cache, Y& disk_cache);

// Priorities for compiling. The lower the value, the sooner the pipeline is compiled.
// The shader cache is compiled last, as it is the least likely to be required. On demand
// shaders are always compiled before pending ubershaders, as we want to use the ubershader
@@ -213,6 +222,8 @@ class ShaderCache final
std::map<GXUberPipelineUid, std::pair<std::unique_ptr<AbstractPipeline>, bool>>
m_gx_uber_pipeline_cache;
File::IOFile m_gx_pipeline_uid_cache_file;
LinearDiskCache<SerializedGXPipelineUid, u8> m_gx_pipeline_disk_cache;
LinearDiskCache<SerializedGXUberPipelineUid, u8> m_gx_uber_pipeline_disk_cache;

// EFB copy to VRAM/RAM pipelines
std::map<TextureConversionShaderGen::TCShaderUid, std::unique_ptr<AbstractPipeline>>
@@ -219,6 +219,8 @@ struct VideoConfig final
bool bSupportsBackgroundCompiling;
bool bSupportsLargePoints;
bool bSupportsPartialDepthCopies;
bool bSupportsShaderBinaries;
bool bSupportsPipelineCacheData;
} backend_info;

// Utility