From 035a8f5b33aada9bd4310ddaa66691853ae408b7 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 26 Jul 2022 03:57:30 -0500 Subject: [PATCH] VideoCommon: Cull vertices on the CPU --- Source/Core/Core/Config/GraphicsSettings.cpp | 1 + Source/Core/Core/Config/GraphicsSettings.h | 1 + .../Core/VideoCommon/VertexLoaderManager.cpp | 29 +++++++++++++++---- Source/Core/VideoCommon/VertexManagerBase.cpp | 20 +++++++++++++ Source/Core/VideoCommon/VertexManagerBase.h | 9 ++++++ Source/Core/VideoCommon/VideoConfig.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 7 files changed, 56 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 3d99d06be448..c61993e0dbf1 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -90,6 +90,7 @@ const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; const Info GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{ {System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false}; +const Info GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, true}; const Info GFX_MTL_MANUALLY_UPLOAD_BUFFERS{ {System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index cd75b068a433..b3a000d32120 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -78,6 +78,7 @@ extern const Info GFX_SHADER_COMPILER_THREADS; extern const Info GFX_SHADER_PRECOMPILER_THREADS; extern const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE; extern const Info GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION; +extern const Info GFX_CPU_CULL; extern const Info GFX_MTL_MANUALLY_UPLOAD_BUFFERS; extern const Info GFX_MTL_USE_PRESENT_DRAWABLE; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index c37b4a4fa219..88aaf0209fe0 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -29,6 +29,7 @@ #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" namespace VertexLoaderManager @@ -360,17 +361,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun VertexShaderManager::SetVertexFormat(loader->m_native_components, loader->m_native_vertex_format->GetVertexDeclaration()); + // CPUCull's performance increase comes from encoding less GPU commands, not sending less data + // Therefore it's only useful to check if culling could remove a flush + const bool can_cpu_cull = g_ActiveConfig.bCPUCull && + primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES && + !g_vertex_manager->HasSendableVertices(); + // if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads. - // They still need to go through vertex loading, because we need to calculate a zfreeze refrence - // slope. - bool cullall = (bpmem.genMode.cullmode == CullMode::All && - primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); + // They still need to go through vertex loading, because we need to calculate a zfreeze + // reference slope. + const bool cullall = (bpmem.genMode.cullmode == CullMode::All && + primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES); - DataReader dst = g_vertex_manager->PrepareForAdditionalData( - primitive, count, loader->m_native_vtx_decl.stride, cullall); + const int stride = loader->m_native_vtx_decl.stride; + DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride, + cullall || can_cpu_cull); count = loader->RunVertices(src, dst.GetPointer(), count); + if (can_cpu_cull && !cullall) + { + if (!g_vertex_manager->CPUCullVertices(loader, primitive, dst.GetPointer(), count)) + { + DataReader new_dst = g_vertex_manager->DisableCullAll(stride); + memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride); + } + } + g_vertex_manager->AddIndices(primitive, count); g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride); diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 273f67c746da..03392f199276 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -103,6 +103,7 @@ VertexManagerBase::~VertexManagerBase() = default; bool VertexManagerBase::Initialize() { m_index_generator.Init(); + m_cpu_cull.Init(); return true; } @@ -116,6 +117,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v m_index_generator.AddIndices(primitive, num_vertices); } +bool VertexManagerBase::CPUCullVertices(VertexLoaderBase* loader, + OpcodeDecoder::Primitive primitive, const u8* src, + u32 count) +{ + return m_cpu_cull.CullVertices(loader, primitive, src, count); +} + DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, bool cullall) { @@ -184,6 +192,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer); } +DataReader VertexManagerBase::DisableCullAll(u32 stride) +{ + if (m_cull_all) + { + m_cull_all = false; + ResetBuffer(stride); + } + return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer); +} + void VertexManagerBase::FlushData(u32 count, u32 stride) { m_cur_buffer_pointer += count * stride; @@ -536,6 +554,8 @@ void VertexManagerBase::Flush() // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call // must be careful to not upload any utility vertices, as the binding will be lost otherwise. const u32 num_indices = m_index_generator.GetIndexLen(); + if (num_indices == 0) + return; u32 base_vertex, base_index; CommitBuffer(m_index_generator.GetNumVerts(), VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index ba3777a7fed6..fd5e94577a43 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -9,6 +9,7 @@ #include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" +#include "VideoCommon/CPUCull.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/ShaderCache.h" @@ -100,11 +101,18 @@ class VertexManagerBase PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); + bool CPUCullVertices(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive, const u8* src, + u32 count); virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, bool cullall); + /// Switch cullall off after a call to PrepareForAdditionalData with cullall true + /// Expects that you will add a nonzero number of primitives before the next flush + /// Returns whether cullall was changed (false if cullall was already off) + DataReader DisableCullAll(u32 stride); void FlushData(u32 count, u32 stride); void Flush(); + bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; } void DoState(PointerWrap& p); @@ -201,6 +209,7 @@ class VertexManagerBase bool m_cull_all = false; IndexGenerator m_index_generator; + CPUCull m_cpu_cull; private: // Minimum number of draws per command buffer when attempting to preempt a readback operation. diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 4e66c9d8224b..8229ba0a3b9b 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -110,6 +110,7 @@ void VideoConfig::Refresh() iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE); iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); + bCPUCull = Config::Get(Config::GFX_CPU_CULL); bForceFiltering = Config::Get(Config::GFX_ENHANCE_FORCE_FILTERING); iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 2bdf4b28353c..adcd268c3953 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -128,6 +128,7 @@ struct VideoConfig final bool bPerfQueriesEnable = false; bool bBBoxEnable = false; bool bForceProgressive = false; + bool bCPUCull = true; bool bEFBEmulateFormatChanges = false; bool bSkipEFBCopyToRam = false;