Skip to content

Commit

Permalink
Merge pull request #11673 from K0bin/vertex-loader-micro-opt
Browse files Browse the repository at this point in the history
Vertex Loader Microoptimization
  • Loading branch information
degasus committed Mar 30, 2023
2 parents 806ea59 + 93fce0e commit 50a45bd
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 59 deletions.
24 changes: 12 additions & 12 deletions Source/Core/VideoCommon/VertexLoaderManager.cpp
Expand Up @@ -258,11 +258,6 @@ VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)

static void CheckCPConfiguration(int vtx_attr_group)
{
if (!g_needs_cp_xf_consistency_check) [[likely]]
return;

g_needs_cp_xf_consistency_check = false;

// Validate that the XF input configuration matches the CP configuration
u32 num_cp_colors = std::count_if(
g_main_cp_state.vtx_desc.low.Color.begin(), g_main_cp_state.vtx_desc.low.Color.end(),
Expand Down Expand Up @@ -359,20 +354,25 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
// Doing early return for the opposite case would be cleaner
// but triggers a false unreachable code warning in MSVC debug builds.

CheckCPConfiguration(vtx_attr_group);
if (g_needs_cp_xf_consistency_check) [[unlikely]]
{
CheckCPConfiguration(vtx_attr_group);
g_needs_cp_xf_consistency_check = false;
}

// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components) [[unlikely]]
{
g_vertex_manager->Flush();

s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
auto& system = Core::System::GetInstance();
auto& vertex_shader_manager = system.GetVertexShaderManager();
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
auto& system = Core::System::GetInstance();
auto& vertex_shader_manager = system.GetVertexShaderManager();
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
loader->m_native_vertex_format->GetVertexDeclaration());

// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
// Therefore it's only useful to check if culling could remove a flush
Expand Down
20 changes: 12 additions & 8 deletions Source/Core/VideoCommon/VertexManagerBase.cpp
Expand Up @@ -140,7 +140,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
PrimitiveType new_primitive_type = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
primitive_from_gx_pr[primitive] :
primitive_from_gx[primitive];
if (m_current_primitive_type != new_primitive_type)
if (m_current_primitive_type != new_primitive_type) [[unlikely]]
{
Flush();

Expand All @@ -149,9 +149,11 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
SetRasterizationStateChanged();
}

u32 remaining_indices = GetRemainingIndices(primitive);
u32 remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);

// Check for size in buffer, if the buffer gets full, call Flush()
if (!m_is_flushed && (count > m_index_generator.GetRemainingIndices(primitive) ||
count > GetRemainingIndices(primitive) ||
if (!m_is_flushed && (count > remaining_index_generator_indices || count > remaining_indices ||
needed_vertex_bytes > GetRemainingSize())) [[unlikely]]
{
Flush();
Expand All @@ -160,7 +162,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
m_cull_all = cullall;

// need to alloc new buffer
if (m_is_flushed)
if (m_is_flushed) [[unlikely]]
{
if (cullall)
{
Expand All @@ -174,21 +176,23 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
ResetBuffer(stride);
}

remaining_index_generator_indices = m_index_generator.GetRemainingIndices(primitive);
remaining_indices = GetRemainingIndices(primitive);
m_is_flushed = false;
}

// Now that we've reset the buffer, there should be enough space. It's possible that we still
// won't have enough space in a few rare cases, such as vertex shader line/point expansion with a
// ton of lines in one draw command, in which case we will either need to add support for
// splitting a single draw command into multiple draws or using bigger indices.
ASSERT_MSG(VIDEO, count <= m_index_generator.GetRemainingIndices(primitive),
ASSERT_MSG(VIDEO, count <= remaining_index_generator_indices,
"VertexManager: Too few remaining index values ({} > {}). "
"32-bit indices or primitive breaking needed.",
count, m_index_generator.GetRemainingIndices(primitive));
ASSERT_MSG(VIDEO, count <= GetRemainingIndices(primitive),
count, remaining_index_generator_indices);
ASSERT_MSG(VIDEO, count <= remaining_indices,
"VertexManager: Buffer not large enough for all indices! ({} > {}) "
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.",
count, GetRemainingIndices(primitive));
count, remaining_indices);
ASSERT_MSG(VIDEO, needed_vertex_bytes <= GetRemainingSize(),
"VertexManager: Buffer not large enough for all vertices! ({} > {}) "
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.",
Expand Down
38 changes: 0 additions & 38 deletions Source/Core/VideoCommon/VertexShaderManager.cpp
Expand Up @@ -621,44 +621,6 @@ void VertexShaderManager::SetMaterialColorChanged(int index)
m_materials_changed[index] = true;
}

static void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
{
if (*old_value == new_value)
return;
*old_value = new_value;
*dirty = true;
}

static void UpdateOffset(bool* dirty, bool include_components, u32* old_value,
const AttributeFormat& attribute)
{
if (!attribute.enable)
return;
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
if (include_components)
new_value |= attribute.components << 16;
UpdateValue(dirty, old_value, new_value);
}

template <size_t N>
static void UpdateOffsets(bool* dirty, bool include_components, std::array<u32, N>* old_value,
const std::array<AttributeFormat, N>& attribute)
{
for (size_t i = 0; i < N; i++)
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
}

void VertexShaderManager::SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
{
UpdateValue(&dirty, &constants.components, components);
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
}

void VertexShaderManager::SetTexMatrixInfoChanged(int index)
{
// TODO: Should we track this with more precision, like which indices changed?
Expand Down
41 changes: 40 additions & 1 deletion Source/Core/VideoCommon/VertexShaderManager.h
Expand Up @@ -11,6 +11,7 @@
#include "Common/CommonTypes.h"
#include "Common/Matrix.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/NativeVertexFormat.h"

class PointerWrap;
struct PortableVertexDeclaration;
Expand All @@ -34,7 +35,6 @@ class alignas(16) VertexShaderManager
void SetProjectionChanged();
void SetMaterialColorChanged(int index);

void SetVertexFormat(u32 components, const PortableVertexDeclaration& format);
void SetTexMatrixInfoChanged(int index);
void SetLightingConfigChanged();

Expand All @@ -49,6 +49,45 @@ class alignas(16) VertexShaderManager
VertexShaderConstants constants{};
bool dirty = false;

static DOLPHIN_FORCE_INLINE void UpdateValue(bool* dirty, u32* old_value, u32 new_value)
{
if (*old_value == new_value)
return;
*old_value = new_value;
*dirty = true;
}

static DOLPHIN_FORCE_INLINE void UpdateOffset(bool* dirty, bool include_components,
u32* old_value, const AttributeFormat& attribute)
{
if (!attribute.enable)
return;
u32 new_value = attribute.offset / 4; // GPU uses uint offsets
if (include_components)
new_value |= attribute.components << 16;
UpdateValue(dirty, old_value, new_value);
}

template <size_t N>
static DOLPHIN_FORCE_INLINE void UpdateOffsets(bool* dirty, bool include_components,
std::array<u32, N>* old_value,
const std::array<AttributeFormat, N>& attribute)
{
for (size_t i = 0; i < N; i++)
UpdateOffset(dirty, include_components, &(*old_value)[i], attribute[i]);
}

DOLPHIN_FORCE_INLINE void SetVertexFormat(u32 components, const PortableVertexDeclaration& format)
{
UpdateValue(&dirty, &constants.components, components);
UpdateValue(&dirty, &constants.vertex_stride, format.stride / 4);
UpdateOffset(&dirty, true, &constants.vertex_offset_position, format.position);
UpdateOffset(&dirty, false, &constants.vertex_offset_posmtx, format.posmtx);
UpdateOffsets(&dirty, true, &constants.vertex_offset_texcoords, format.texcoords);
UpdateOffsets(&dirty, false, &constants.vertex_offset_colors, format.colors);
UpdateOffsets(&dirty, false, &constants.vertex_offset_normals, format.normals);
}

private:
alignas(16) std::array<float, 16> m_projection_matrix;

Expand Down

0 comments on commit 50a45bd

Please sign in to comment.