Skip to content
Permalink
Browse files
Merge pull request #11066 from K0bin/vertex-size-opt
Optimize GetVertexSize
  • Loading branch information
JMC47 committed Sep 15, 2022
2 parents 1efb5b8 + fdcd2b7 commit 32fba6d
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 254 deletions.
@@ -12,6 +12,7 @@
#include <fmt/format.h>

#include "Common/Assert.h"
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
@@ -96,45 +97,33 @@ class VertexLoaderTester : public VertexLoaderBase
std::vector<u8> buffer_b;
};

template <class Function>
static void GetComponentSizes(const TVtxDesc& vtx_desc, const VAT& vtx_attr, Function f)
u32 VertexLoaderBase::GetVertexSize(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
{
if (vtx_desc.low.PosMatIdx)
f(1);
for (auto texmtxidx : vtx_desc.low.TexMatIdx)
{
if (texmtxidx)
f(1);
}
u32 size = 0;

// Each enabled TexMatIdx adds one byte, as does PosMatIdx
size += Common::CountSetBits(vtx_desc.low.Hex & 0x1FF);

const u32 pos_size = VertexLoader_Position::GetSize(vtx_desc.low.Position, vtx_attr.g0.PosFormat,
vtx_attr.g0.PosElements);
if (pos_size != 0)
f(pos_size);
size += pos_size;
const u32 norm_size =
VertexLoader_Normal::GetSize(vtx_desc.low.Normal, vtx_attr.g0.NormalFormat,
vtx_attr.g0.NormalElements, vtx_attr.g0.NormalIndex3);
if (norm_size != 0)
f(norm_size);
size += norm_size;
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
const u32 color_size =
VertexLoader_Color::GetSize(vtx_desc.low.Color[i], vtx_attr.GetColorFormat(i));
if (color_size != 0)
f(color_size);
size += color_size;
}
for (u32 i = 0; i < vtx_desc.high.TexCoord.Size(); i++)
{
const u32 tc_size = VertexLoader_TextCoord::GetSize(
vtx_desc.high.TexCoord[i], vtx_attr.GetTexFormat(i), vtx_attr.GetTexElements(i));
if (tc_size != 0)
f(tc_size);
size += tc_size;
}
}

u32 VertexLoaderBase::GetVertexSize(const TVtxDesc& vtx_desc, const VAT& vtx_attr)
{
u32 size = 0;
GetComponentSizes(vtx_desc, vtx_attr, [&size](u32 s) { size += s; });
return size;
}

@@ -168,14 +157,6 @@ u32 VertexLoaderBase::GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& v
return components;
}

std::vector<u32> VertexLoaderBase::GetVertexComponentSizes(const TVtxDesc& vtx_desc,
const VAT& vtx_attr)
{
std::vector<u32> sizes;
GetComponentSizes(vtx_desc, vtx_attr, [&sizes](u32 s) { sizes.push_back(s); });
return sizes;
}

std::unique_ptr<VertexLoaderBase> VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc,
const VAT& vtx_attr)
{
@@ -62,7 +62,6 @@ class VertexLoaderBase
public:
static u32 GetVertexSize(const TVtxDesc& vtx_desc, const VAT& vtx_attr);
static u32 GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& vtx_attr);
static std::vector<u32> GetVertexComponentSizes(const TVtxDesc& vtx_desc, const VAT& vtx_attr);
static std::unique_ptr<VertexLoaderBase> CreateVertexLoader(const TVtxDesc& vtx_desc,
const VAT& vtx_attr);
virtual ~VertexLoaderBase() {}
@@ -205,24 +205,8 @@ constexpr Table<TPipelineFunction> s_table_read_color = {
Color_ReadIndex_32b_8888<u16>}),
};

constexpr Table<u32> s_table_read_color_vertex_size = {
g({0u, 0u, 0u, 0u, 0u, 0u}),
g({2u, 3u, 4u, 2u, 3u, 4u}),
g({1u, 1u, 1u, 1u, 1u, 1u}),
g({2u, 2u, 2u, 2u, 2u, 2u}),
};
} // Anonymous namespace

u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format)
{
if (format > ColorFormat::RGBA8888)
{
PanicAlertFmt("Invalid color format {}", format);
return 0;
}
return s_table_read_color_vertex_size[type][format];
}

TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format)
{
if (format > ColorFormat::RGBA8888)
@@ -4,14 +4,44 @@
#pragma once

#include "Common/CommonTypes.h"
#include "VideoCommon/VertexLoader.h"
#include "Common/EnumMap.h"
#include "Common/Inline.h"

enum class VertexComponentFormat;
enum class ColorFormat;
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexLoader.h"

class VertexLoader_Color
{
public:
static u32 GetSize(VertexComponentFormat type, ColorFormat format);
static DOLPHIN_FORCE_INLINE u32 GetSize(VertexComponentFormat type, ColorFormat format)
{
if (format > ColorFormat::RGBA8888)
{
PanicAlertFmt("Invalid color format {}", format);
return 0;
}
return s_table_size[type][format];
}

static TPipelineFunction GetFunction(VertexComponentFormat type, ColorFormat format);

private:
template <typename T, auto last_member>
using EnumMap = typename Common::EnumMap<T, last_member>;

using SizeTable = EnumMap<EnumMap<u32, ColorFormat::RGBA8888>, VertexComponentFormat::Index16>;

static constexpr SizeTable s_table_size = []() consteval
{
SizeTable table{};

using VCF = VertexComponentFormat;

table[VCF::Direct] = {2u, 3u, 4u, 2u, 3u, 4u};
table[VCF::Index8] = {1u, 1u, 1u, 1u, 1u, 1u};
table[VCF::Index16] = {2u, 2u, 2u, 2u, 2u, 2u};

return table;
}
();
};
@@ -63,20 +63,15 @@ void ReadIndirect(VertexLoader* loader, const T* data)
}

template <typename T, u32 N>
struct Normal_Direct
void Normal_ReadDirect(VertexLoader* loader)
{
static void function(VertexLoader* loader)
{
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(loader, source);
DataSkip<N * 3 * sizeof(T)>();
}

static constexpr u32 size = sizeof(T) * N * 3;
};
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(loader, source);
DataSkip<N * 3 * sizeof(T)>();
}

template <typename I, typename T, u32 N, u32 Offset>
void Normal_Index_Offset(VertexLoader* loader)
void Normal_ReadIndex_Offset(VertexLoader* loader)
{
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");

@@ -88,137 +83,111 @@ void Normal_Index_Offset(VertexLoader* loader)
}

template <typename I, typename T, u32 N>
struct Normal_Index
void Normal_ReadIndex(VertexLoader* loader)
{
static void function(VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(loader); }
static constexpr u32 size = sizeof(I);
};
Normal_ReadIndex_Offset<I, T, N, 0>(loader);
}

template <typename I, typename T>
struct Normal_Index_Indices3
void Normal_ReadIndex_Indices3(VertexLoader* loader)
{
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, 1, 0>(loader);
Normal_Index_Offset<I, T, 1, 1>(loader);
Normal_Index_Offset<I, T, 1, 2>(loader);
}

static constexpr u32 size = sizeof(I) * 3;
};

struct Set
{
template <typename T>
constexpr Set& operator=(const T&)
{
gc_size = T::size;
function = T::function;
return *this;
}

u32 gc_size;
TPipelineFunction function;
};
Normal_ReadIndex_Offset<I, T, 1, 0>(loader);
Normal_ReadIndex_Offset<I, T, 1, 1>(loader);
Normal_ReadIndex_Offset<I, T, 1, 2>(loader);
}

using Common::EnumMap;
using Formats = EnumMap<Set, ComponentFormat::Float>;
using Formats = EnumMap<TPipelineFunction, ComponentFormat::Float>;
using Elements = EnumMap<Formats, NormalComponentCount::NTB>;
using Indices = std::array<Elements, 2>;
using Types = EnumMap<Indices, VertexComponentFormat::Index16>;

constexpr Types InitializeTable()
consteval Types InitializeTable()
{
Types table{};

using VCF = VertexComponentFormat;
using NCC = NormalComponentCount;
using FMT = ComponentFormat;

table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][false][NCC::NTB][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][false][NCC::NTB][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][false][NCC::NTB][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][false][NCC::NTB][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][false][NCC::NTB][FMT::Float] = Normal_Direct<float, 3>();
table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_ReadDirect<u8, 1>;
table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_ReadDirect<s8, 1>;
table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_ReadDirect<u16, 1>;
table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_ReadDirect<s16, 1>;
table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_ReadDirect<float, 1>;
table[VCF::Direct][false][NCC::NTB][FMT::UByte] = Normal_ReadDirect<u8, 3>;
table[VCF::Direct][false][NCC::NTB][FMT::Byte] = Normal_ReadDirect<s8, 3>;
table[VCF::Direct][false][NCC::NTB][FMT::UShort] = Normal_ReadDirect<u16, 3>;
table[VCF::Direct][false][NCC::NTB][FMT::Short] = Normal_ReadDirect<s16, 3>;
table[VCF::Direct][false][NCC::NTB][FMT::Float] = Normal_ReadDirect<float, 3>;

// Same as above, since there are no indices
table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][true][NCC::NTB][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][true][NCC::NTB][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][true][NCC::NTB][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][true][NCC::NTB][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][true][NCC::NTB][FMT::Float] = Normal_Direct<float, 3>();

table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][false][NCC::NTB][FMT::UByte] = Normal_Index<u8, u8, 3>();
table[VCF::Index8][false][NCC::NTB][FMT::Byte] = Normal_Index<u8, s8, 3>();
table[VCF::Index8][false][NCC::NTB][FMT::UShort] = Normal_Index<u8, u16, 3>();
table[VCF::Index8][false][NCC::NTB][FMT::Short] = Normal_Index<u8, s16, 3>();
table[VCF::Index8][false][NCC::NTB][FMT::Float] = Normal_Index<u8, float, 3>();
table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_ReadDirect<u8, 1>;
table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_ReadDirect<s8, 1>;
table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_ReadDirect<u16, 1>;
table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_ReadDirect<s16, 1>;
table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_ReadDirect<float, 1>;
table[VCF::Direct][true][NCC::NTB][FMT::UByte] = Normal_ReadDirect<u8, 3>;
table[VCF::Direct][true][NCC::NTB][FMT::Byte] = Normal_ReadDirect<s8, 3>;
table[VCF::Direct][true][NCC::NTB][FMT::UShort] = Normal_ReadDirect<u16, 3>;
table[VCF::Direct][true][NCC::NTB][FMT::Short] = Normal_ReadDirect<s16, 3>;
table[VCF::Direct][true][NCC::NTB][FMT::Float] = Normal_ReadDirect<float, 3>;

table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_ReadIndex<u8, u8, 1>;
table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_ReadIndex<u8, s8, 1>;
table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_ReadIndex<u8, u16, 1>;
table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_ReadIndex<u8, s16, 1>;
table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_ReadIndex<u8, float, 1>;
table[VCF::Index8][false][NCC::NTB][FMT::UByte] = Normal_ReadIndex<u8, u8, 3>;
table[VCF::Index8][false][NCC::NTB][FMT::Byte] = Normal_ReadIndex<u8, s8, 3>;
table[VCF::Index8][false][NCC::NTB][FMT::UShort] = Normal_ReadIndex<u8, u16, 3>;
table[VCF::Index8][false][NCC::NTB][FMT::Short] = Normal_ReadIndex<u8, s16, 3>;
table[VCF::Index8][false][NCC::NTB][FMT::Float] = Normal_ReadIndex<u8, float, 3>;

// Same for NormalComponentCount::N; differs for NTB
table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][true][NCC::NTB][FMT::UByte] = Normal_Index_Indices3<u8, u8>();
table[VCF::Index8][true][NCC::NTB][FMT::Byte] = Normal_Index_Indices3<u8, s8>();
table[VCF::Index8][true][NCC::NTB][FMT::UShort] = Normal_Index_Indices3<u8, u16>();
table[VCF::Index8][true][NCC::NTB][FMT::Short] = Normal_Index_Indices3<u8, s16>();
table[VCF::Index8][true][NCC::NTB][FMT::Float] = Normal_Index_Indices3<u8, float>();

table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][false][NCC::NTB][FMT::UByte] = Normal_Index<u16, u8, 3>();
table[VCF::Index16][false][NCC::NTB][FMT::Byte] = Normal_Index<u16, s8, 3>();
table[VCF::Index16][false][NCC::NTB][FMT::UShort] = Normal_Index<u16, u16, 3>();
table[VCF::Index16][false][NCC::NTB][FMT::Short] = Normal_Index<u16, s16, 3>();
table[VCF::Index16][false][NCC::NTB][FMT::Float] = Normal_Index<u16, float, 3>();
table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_ReadIndex<u8, u8, 1>;
table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_ReadIndex<u8, s8, 1>;
table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_ReadIndex<u8, u16, 1>;
table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_ReadIndex<u8, s16, 1>;
table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_ReadIndex<u8, float, 1>;
table[VCF::Index8][true][NCC::NTB][FMT::UByte] = Normal_ReadIndex_Indices3<u8, u8>;
table[VCF::Index8][true][NCC::NTB][FMT::Byte] = Normal_ReadIndex_Indices3<u8, s8>;
table[VCF::Index8][true][NCC::NTB][FMT::UShort] = Normal_ReadIndex_Indices3<u8, u16>;
table[VCF::Index8][true][NCC::NTB][FMT::Short] = Normal_ReadIndex_Indices3<u8, s16>;
table[VCF::Index8][true][NCC::NTB][FMT::Float] = Normal_ReadIndex_Indices3<u8, float>;

table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_ReadIndex<u16, u8, 1>;
table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_ReadIndex<u16, s8, 1>;
table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_ReadIndex<u16, u16, 1>;
table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_ReadIndex<u16, s16, 1>;
table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_ReadIndex<u16, float, 1>;
table[VCF::Index16][false][NCC::NTB][FMT::UByte] = Normal_ReadIndex<u16, u8, 3>;
table[VCF::Index16][false][NCC::NTB][FMT::Byte] = Normal_ReadIndex<u16, s8, 3>;
table[VCF::Index16][false][NCC::NTB][FMT::UShort] = Normal_ReadIndex<u16, u16, 3>;
table[VCF::Index16][false][NCC::NTB][FMT::Short] = Normal_ReadIndex<u16, s16, 3>;
table[VCF::Index16][false][NCC::NTB][FMT::Float] = Normal_ReadIndex<u16, float, 3>;

// Same for NormalComponentCount::N; differs for NTB
table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][true][NCC::NTB][FMT::UByte] = Normal_Index_Indices3<u16, u8>();
table[VCF::Index16][true][NCC::NTB][FMT::Byte] = Normal_Index_Indices3<u16, s8>();
table[VCF::Index16][true][NCC::NTB][FMT::UShort] = Normal_Index_Indices3<u16, u16>();
table[VCF::Index16][true][NCC::NTB][FMT::Short] = Normal_Index_Indices3<u16, s16>();
table[VCF::Index16][true][NCC::NTB][FMT::Float] = Normal_Index_Indices3<u16, float>();
table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_ReadIndex<u16, u8, 1>;
table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_ReadIndex<u16, s8, 1>;
table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_ReadIndex<u16, u16, 1>;
table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_ReadIndex<u16, s16, 1>;
table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_ReadIndex<u16, float, 1>;
table[VCF::Index16][true][NCC::NTB][FMT::UByte] = Normal_ReadIndex_Indices3<u16, u8>;
table[VCF::Index16][true][NCC::NTB][FMT::Byte] = Normal_ReadIndex_Indices3<u16, s8>;
table[VCF::Index16][true][NCC::NTB][FMT::UShort] = Normal_ReadIndex_Indices3<u16, u16>;
table[VCF::Index16][true][NCC::NTB][FMT::Short] = Normal_ReadIndex_Indices3<u16, s16>;
table[VCF::Index16][true][NCC::NTB][FMT::Float] = Normal_ReadIndex_Indices3<u16, float>;

return table;
}

constexpr Types s_table = InitializeTable();
constexpr Types s_table_read_normal = InitializeTable();
} // Anonymous namespace

u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, bool index3)
{
return s_table[type][index3][elements][format].gc_size;
}

TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type,
ComponentFormat format,
NormalComponentCount elements, bool index3)
{
return s_table[type][index3][elements][format].function;
return s_table_read_normal[type][index3][elements][format];
}

0 comments on commit 32fba6d

Please sign in to comment.