Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpcodeDecoding: Cache vertex sizes #11067

Merged
merged 3 commits into from
Sep 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions Source/Core/Core/FifoPlayer/FifoPlayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class FifoPlaybackAnalyzer : public OpcodeDecoder::Callback

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}

bool m_start_of_primitives = false;
bool m_end_of_primitives = false;
bool m_efb_copy = false;
Expand Down
5 changes: 5 additions & 0 deletions Source/Core/Core/FifoPlayer/FifoRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class FifoRecorder::FifoRecordAnalyzer : public OpcodeDecoder::Callback

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}

private:
void ProcessVertexComponent(CPArray array_index, VertexComponentFormat array_type,
u32 component_offset, u32 vertex_size, u16 num_vertices,
Expand Down
10 changes: 10 additions & 0 deletions Source/Core/DolphinQt/FIFO/FIFOAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,11 @@ class DetailCallback : public OpcodeDecoder::Callback

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}

QString text;
CPState m_cpmem;
};
Expand Down Expand Up @@ -731,6 +736,11 @@ class DescriptionCallback : public OpcodeDecoder::Callback

OPCODE_CALLBACK(CPState& GetCPState()) { return m_cpmem; }

OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
return VertexLoaderBase::GetVertexSize(GetCPState().vtx_desc, GetCPState().vtx_attr[vat]);
}

QString text;
CPState m_cpmem;
};
Expand Down
8 changes: 7 additions & 1 deletion Source/Core/VideoCommon/OpcodeDecoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class RunCallback final : public Callback
// HACK
DataReader src{const_cast<u8*>(vertex_data), const_cast<u8*>(vertex_data) + size};
const u32 bytes =
VertexLoaderManager::RunVertices(vat, primitive, num_vertices, src, is_preprocess);
VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, src);

ASSERT(bytes == size);

Expand Down Expand Up @@ -228,6 +228,12 @@ class RunCallback final : public Callback
return g_main_cp_state;
}

OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
{
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
return loader->m_vertex_size;
}

u32 m_cycles = 0;
bool m_in_display_list = false;
};
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/VideoCommon/OpcodeDecoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class Callback

// Get the current CP state. Needed for vertex decoding; will also be mutated for CP commands.
virtual CPState& GetCPState() = 0;

virtual u32 GetVertexSize(u8 vat) = 0;
#endif
};

Expand Down Expand Up @@ -229,8 +231,7 @@ static DOLPHIN_FORCE_INLINE u32 RunCommand(const u8* data, u32 available, T& cal
(cmdbyte & OpcodeDecoder::GX_PRIMITIVE_MASK) >> OpcodeDecoder::GX_PRIMITIVE_SHIFT);
const u8 vat = cmdbyte & OpcodeDecoder::GX_VAT_MASK;

const u32 vertex_size = VertexLoaderBase::GetVertexSize(callback.GetCPState().vtx_desc,
callback.GetCPState().vtx_attr[vat]);
const u32 vertex_size = callback.GetVertexSize(vat);
const u16 num_vertices = Common::swap16(&data[1]);

if (available < 3 + num_vertices * vertex_size)
Expand Down
140 changes: 69 additions & 71 deletions Source/Core/VideoCommon/VertexLoaderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
BitSet8 g_main_vat_dirty;
BitSet8 g_preprocess_vat_dirty;
bool g_bases_dirty; // Main only
u8 g_current_vat; // Main only
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;

Expand All @@ -78,7 +77,7 @@ void Clear()
void UpdateVertexArrayPointers()
{
// Anything to update?
if (!g_bases_dirty)
if (!g_bases_dirty) [[likely]]
return;

// Some games such as Burnout 2 can put invalid addresses into
Expand Down Expand Up @@ -198,59 +197,50 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
return GetOrCreateMatchingFormat(new_decl);
}

static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
namespace detail
{
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders;
g_current_vat = vtx_attr_group;
template <bool IsPreprocess>
VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group)
K0bin marked this conversation as resolved.
Show resolved Hide resolved
{
constexpr CPState* state = IsPreprocess ? &g_preprocess_cp_state : &g_main_cp_state;
constexpr BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
constexpr auto& vertex_loaders =
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;

VertexLoaderBase* loader;
if (attr_dirty[vtx_attr_group])

// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread
bool check_for_native_format = !IsPreprocess;

VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
K0bin marked this conversation as resolved.
Show resolved Hide resolved
if (iter != s_vertex_loader_map.end())
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread
bool check_for_native_format = !preprocess;

VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
if (iter != s_vertex_loader_map.end())
{
loader = iter->second.get();
check_for_native_format &= !loader->m_native_vertex_format;
}
else
{
s_vertex_loader_map[uid] =
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
loader = s_vertex_loader_map[uid].get();
INCSTAT(g_stats.num_vertex_loaders);
}
if (check_for_native_format)
{
// search for a cached native vertex format
const PortableVertexDeclaration& format = loader->m_native_vtx_decl;
std::unique_ptr<NativeVertexFormat>& native = s_native_vertex_map[format];
if (!native)
native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get();
}
vertex_loaders[vtx_attr_group] = loader;
attr_dirty[vtx_attr_group] = false;
loader = iter->second.get();
check_for_native_format &= !loader->m_native_vertex_format;
}
else
{
loader = vertex_loaders[vtx_attr_group];
auto [it, added] = s_vertex_loader_map.try_emplace(
uid,
VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]));
loader = it->second.get();
INCSTAT(g_stats.num_vertex_loaders);
}

// Lookup pointers for any vertex arrays.
if (!preprocess)
UpdateVertexArrayPointers();

if (check_for_native_format)
{
// search for a cached native vertex format
loader->m_native_vertex_format = GetOrCreateMatchingFormat(loader->m_native_vtx_decl);
}
vertex_loaders[vtx_attr_group] = loader;
attr_dirty[vtx_attr_group] = false;
return loader;
}

} // namespace detail

static void CheckCPConfiguration(int vtx_attr_group)
{
// Validate that the XF input configuration matches the CP configuration
Expand Down Expand Up @@ -335,53 +325,61 @@ static void CheckCPConfiguration(int vtx_attr_group)
}
}

int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess)
template <bool IsPreprocess>
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src)
{
if (count == 0)
return 0;
ASSERT(count > 0);

VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);
VertexLoaderBase* loader = RefreshLoader<IsPreprocess>(vtx_attr_group);

int size = count * loader->m_vertex_size;
if ((int)src.size() < size)
return -1;

if (is_preprocess)
return size;
if constexpr (!IsPreprocess)
{
// Doing early return for the opposite case would be cleaner
// but triggers a false unreachable code warning in MSVC debug builds.

CheckCPConfiguration(vtx_attr_group);
CheckCPConfiguration(vtx_attr_group);

// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components)
{
g_vertex_manager->Flush();
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
VertexShaderManager::SetVertexFormat(loader->m_native_components);
// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt ||
loader->m_native_components != g_current_components)
{
g_vertex_manager->Flush();
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
VertexShaderManager::SetVertexFormat(loader->m_native_components);

// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);

DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);

count = loader->RunVertices(src, dst, count);
count = loader->RunVertices(src, dst, count);

g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
g_vertex_manager->AddIndices(primitive, count);
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);

ADDSTAT(g_stats.this_frame.num_prims, count);
INCSTAT(g_stats.this_frame.num_primitive_joins);
ADDSTAT(g_stats.this_frame.num_prims, count);
INCSTAT(g_stats.this_frame.num_primitive_joins);
}
return size;
}

template int RunVertices<false>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
DataReader src);
template int RunVertices<true>(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count,
DataReader src);

NativeVertexFormat* GetCurrentVertexFormat()
{
return s_current_vtx_fmt;
Expand Down
38 changes: 35 additions & 3 deletions Source/Core/VideoCommon/VertexLoaderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,16 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);

// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess);
template <bool IsPreprocess = false>
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src);

namespace detail
{
// This will look for an existing loader in the global hashmap or create a new one if there is none.
// It should not be used directly because RefreshLoaders() has another cache for fast lookups.
template <bool IsPreprocess = false>
VertexLoaderBase* GetOrCreateLoader(int vtx_attr_group);
K0bin marked this conversation as resolved.
Show resolved Hide resolved
} // namespace detail

NativeVertexFormat* GetCurrentVertexFormat();

Expand All @@ -66,7 +74,31 @@ extern u32 g_current_components;
extern BitSet8 g_main_vat_dirty;
extern BitSet8 g_preprocess_vat_dirty;
extern bool g_bases_dirty; // Main only
extern u8 g_current_vat; // Main only
K0bin marked this conversation as resolved.
Show resolved Hide resolved
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;

template <bool IsPreprocess = false>
VertexLoaderBase* RefreshLoader(int vtx_attr_group)
{
constexpr const BitSet8& attr_dirty = IsPreprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
constexpr const auto& vertex_loaders =
IsPreprocess ? g_preprocess_vertex_loaders : g_main_vertex_loaders;

VertexLoaderBase* loader;
if (!attr_dirty[vtx_attr_group]) [[likely]]
{
loader = vertex_loaders[vtx_attr_group];
}
else [[unlikely]]
{
loader = detail::GetOrCreateLoader<IsPreprocess>(vtx_attr_group);
}

// Lookup pointers for any vertex arrays.
if constexpr (!IsPreprocess)
UpdateVertexArrayPointers();

return loader;
}

} // namespace VertexLoaderManager