Skip to content

Commit

Permalink
VideoCommon: Cull vertices on the CPU
Browse files Browse the repository at this point in the history
  • Loading branch information
TellowKrinkle committed Nov 29, 2022
1 parent f87547b commit 035a8f5
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 6 deletions.
1 change: 1 addition & 0 deletions Source/Core/Core/Config/GraphicsSettings.cpp
Expand Up @@ -90,6 +90,7 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
const Info<bool> GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, true};

const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
Expand Down
1 change: 1 addition & 0 deletions Source/Core/Core/Config/GraphicsSettings.h
Expand Up @@ -78,6 +78,7 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
extern const Info<bool> GFX_CPU_CULL;

extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
Expand Down
29 changes: 23 additions & 6 deletions Source/Core/VideoCommon/VertexLoaderManager.cpp
Expand Up @@ -29,6 +29,7 @@
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"

namespace VertexLoaderManager
Expand Down Expand Up @@ -360,17 +361,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
VertexShaderManager::SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());

// CPUCull's performance increase comes from encoding less GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
!g_vertex_manager->HasSendableVertices();

// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
// They still need to go through vertex loading, because we need to calculate a zfreeze
// reference slope.
const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);

DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
const int stride = loader->m_native_vtx_decl.stride;
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
cullall || can_cpu_cull);

count = loader->RunVertices(src, dst.GetPointer(), count);

if (can_cpu_cull && !cullall)
{
if (!g_vertex_manager->CPUCullVertices(loader, primitive, dst.GetPointer(), count))
{
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
}
}

g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);

Expand Down
20 changes: 20 additions & 0 deletions Source/Core/VideoCommon/VertexManagerBase.cpp
Expand Up @@ -103,6 +103,7 @@ VertexManagerBase::~VertexManagerBase() = default;
bool VertexManagerBase::Initialize()
{
m_index_generator.Init();
m_cpu_cull.Init();
return true;
}

Expand All @@ -116,6 +117,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
m_index_generator.AddIndices(primitive, num_vertices);
}

bool VertexManagerBase::CPUCullVertices(VertexLoaderBase* loader,
OpcodeDecoder::Primitive primitive, const u8* src,
u32 count)
{
return m_cpu_cull.CullVertices(loader, primitive, src, count);
}

DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
u32 count, u32 stride, bool cullall)
{
Expand Down Expand Up @@ -184,6 +192,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
}

DataReader VertexManagerBase::DisableCullAll(u32 stride)
{
if (m_cull_all)
{
m_cull_all = false;
ResetBuffer(stride);
}
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
}

void VertexManagerBase::FlushData(u32 count, u32 stride)
{
m_cur_buffer_pointer += count * stride;
Expand Down Expand Up @@ -536,6 +554,8 @@ void VertexManagerBase::Flush()
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
const u32 num_indices = m_index_generator.GetIndexLen();
if (num_indices == 0)
return;
u32 base_vertex, base_index;
CommitBuffer(m_index_generator.GetNumVerts(),
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
Expand Down
9 changes: 9 additions & 0 deletions Source/Core/VideoCommon/VertexManagerBase.h
Expand Up @@ -9,6 +9,7 @@
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "VideoCommon/CPUCull.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/ShaderCache.h"
Expand Down Expand Up @@ -100,11 +101,18 @@ class VertexManagerBase

PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
bool CPUCullVertices(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive, const u8* src,
u32 count);
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
u32 stride, bool cullall);
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
/// Expects that you will add a nonzero number of primitives before the next flush
/// Returns whether cullall was changed (false if cullall was already off)
DataReader DisableCullAll(u32 stride);
void FlushData(u32 count, u32 stride);

void Flush();
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }

void DoState(PointerWrap& p);

Expand Down Expand Up @@ -201,6 +209,7 @@ class VertexManagerBase
bool m_cull_all = false;

IndexGenerator m_index_generator;
CPUCull m_cpu_cull;

private:
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoConfig.cpp
Expand Up @@ -110,6 +110,7 @@ void VideoConfig::Refresh()
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
bCPUCull = Config::Get(Config::GFX_CPU_CULL);

bForceFiltering = Config::Get(Config::GFX_ENHANCE_FORCE_FILTERING);
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoConfig.h
Expand Up @@ -128,6 +128,7 @@ struct VideoConfig final
bool bPerfQueriesEnable = false;
bool bBBoxEnable = false;
bool bForceProgressive = false;
bool bCPUCull = true;

bool bEFBEmulateFormatChanges = false;
bool bSkipEFBCopyToRam = false;
Expand Down

0 comments on commit 035a8f5

Please sign in to comment.