Large diffs are not rendered by default.

@@ -963,7 +963,7 @@ void Renderer::RecordVideoMemory()
const u32* xfregs_ptr = reinterpret_cast<const u32*>(&xfmem) + FifoDataFile::XF_MEM_SIZE;
u32 xfregs_size = sizeof(XFMemory) / 4 - FifoDataFile::XF_MEM_SIZE;

FillCPMemoryArray(cpmem);
g_main_cp_state.FillCPMemoryArray(cpmem);

FifoRecorder::GetInstance().SetVideoMemory(bpmem_ptr, cpmem, xfmem_ptr, xfregs_ptr, xfregs_size,
texMem);
@@ -986,9 +986,9 @@ bool Renderer::InitializeImGui()
ImGui::GetStyle().WindowRounding = 7.0f;

PortableVertexDeclaration vdecl = {};
vdecl.position = {VAR_FLOAT, 2, offsetof(ImDrawVert, pos), true, false};
vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false};
vdecl.position = {ComponentFormat::Float, 2, offsetof(ImDrawVert, pos), true, false};
vdecl.texcoords[0] = {ComponentFormat::Float, 2, offsetof(ImDrawVert, uv), true, false};
vdecl.colors[0] = {ComponentFormat::UByte, 4, offsetof(ImDrawVert, col), true, false};
vdecl.stride = sizeof(ImDrawVert);
m_imgui_vertex_format = CreateNativeVertexFormat(vdecl);
if (!m_imgui_vertex_format)
@@ -1095,7 +1095,7 @@ void ShaderCache::QueueUberShaderPipelines()
// All attributes will be enabled in GetUberVertexFormat.
PortableVertexDeclaration dummy_vertex_decl = {};
dummy_vertex_decl.position.components = 4;
dummy_vertex_decl.position.type = VAR_FLOAT;
dummy_vertex_decl.position.type = ComponentFormat::Float;
dummy_vertex_decl.position.enable = true;
dummy_vertex_decl.stride = sizeof(float) * 4;
NativeVertexFormat* dummy_vertex_format =
@@ -13,10 +13,11 @@

#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"

enum class APIType;
#include "VideoCommon/VideoCommon.h"

/**
* Common interface for classes that need to go through the shader generation path
@@ -210,6 +211,64 @@ std::string BitfieldExtract(std::string_view source)
static_cast<u32>(BitFieldT::NumBits()));
}

template <auto last_member, typename = decltype(last_member)>
void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
const Common::EnumMap<std::string_view, last_member>& values, int indent,
bool break_)
{
const bool make_switch = (ApiType == APIType::D3D);

// The second template argument is needed to avoid compile errors from ambiguity with multiple
// enums with the same number of members in GCC prior to 8. See https://godbolt.org/z/xcKaW1seW
// and https://godbolt.org/z/hz7Yqq1P5
using enum_type = decltype(last_member);

// {:{}} is used to indent by formatting an empty string with a variable width
if (make_switch)
{
out.Write("{:{}}switch ({}) {{\n", "", indent, variable);
for (u32 i = 0; i <= static_cast<u32>(last_member); i++)
{
const enum_type key = static_cast<enum_type>(i);

// Assumes existence of an EnumFormatter
out.Write("{:{}}case {:s}:\n", "", indent, key);
// Note that this indentation behaves poorly for multi-line code
if (!values[key].empty())
out.Write("{:{}} {}\n", "", indent, values[key]);
if (break_)
out.Write("{:{}} break;\n", "", indent);
}
out.Write("{:{}}}}\n", "", indent);
}
else
{
// Generate a tree of if statements recursively
// std::function must be used because auto won't capture before initialization and thus can't be
// used recursively
std::function<void(u32, u32, u32)> BuildTree = [&](u32 cur_indent, u32 low, u32 high) {
// Each generated statement is for low <= x < high
if (high == low + 1)
{
// Down to 1 case (low <= x < low + 1 means x == low)
const enum_type key = static_cast<enum_type>(low);
// Note that this indentation behaves poorly for multi-line code
out.Write("{:{}}{} // {}\n", "", cur_indent, values[key], key);
}
else
{
u32 mid = low + ((high - low) / 2);
out.Write("{:{}}if ({} < {}u) {{\n", "", cur_indent, variable, mid);
BuildTree(cur_indent + 2, low, mid);
out.Write("{:{}}}} else {{\n", "", cur_indent);
BuildTree(cur_indent + 2, mid, high);
out.Write("{:{}}}}\n", "", cur_indent);
}
};
BuildTree(indent, 0, static_cast<u32>(last_member) + 1);
}
}

// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"
@@ -1300,42 +1300,30 @@ TextureCacheBase::GetTexture(const int textureCacheSafetyColorSampleSize, Textur
// Search the texture cache for textures by address
//
// Find all texture cache entries for the current texture address, and decide whether to use one
// of
// them, or to create a new one
// of them, or to create a new one
//
// In most cases, the fastest way is to use only one texture cache entry for the same address.
// Usually,
// when a texture changes, the old version of the texture is unlikely to be used again. If there
// were
// new cache entries created for normal texture updates, there would be a slowdown due to a huge
// amount
// of unused cache entries. Also thanks to texture pooling, overwriting an existing cache entry is
// faster than creating a new one from scratch.
// Usually, when a texture changes, the old version of the texture is unlikely to be used again.
// If there were new cache entries created for normal texture updates, there would be a slowdown
// due to a huge amount of unused cache entries. Also thanks to texture pooling, overwriting an
// existing cache entry is faster than creating a new one from scratch.
//
// Some games use the same address for different textures though. If the same cache entry was used
// in
// this case, it would be constantly overwritten, and effectively there wouldn't be any caching
// for
// those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has
// multiple
// sets of fonts on each other stored in a single texture and uses the palette to make different
// characters visible or invisible. In Castlevania 3 some textures are used for 2 different things
// or
// at least in 2 different ways(size 1024x1024 vs 1024x256).
// in this case, it would be constantly overwritten, and effectively there wouldn't be any caching
// for those textures. Examples for this are Metroid Prime and Castlevania 3. Metroid Prime has
// multiple sets of fonts on each other stored in a single texture and uses the palette to make
// different characters visible or invisible. In Castlevania 3 some textures are used for 2
// different things or at least in 2 different ways (size 1024x1024 vs 1024x256).
//
// To determine whether to use multiple cache entries or a single entry, use the following
// heuristic:
// If the same texture address is used several times during the same frame, assume the address is
// used
// for different purposes and allow creating an additional cache entry. If there's at least one
// entry
// that hasn't been used for the same frame, then overwrite it, in order to keep the cache as
// small as
// possible. If the current texture is found in the cache, use that entry.
// heuristic: If the same texture address is used several times during the same frame, assume the
// address is used for different purposes and allow creating an additional cache entry. If there's
// at least one entry that hasn't been used for the same frame, then overwrite it, in order to
// keep the cache as small as possible. If the current texture is found in the cache, use that
// entry.
//
// For efb copies, the entry created in CopyRenderTargetToTexture always has to be used, or else
// it was
// done in vain.
// it was done in vain.
auto iter_range = textures_by_address.equal_range(texture_info.GetRawAddress());
TexAddrCache::iterator iter = iter_range.first;
TexAddrCache::iterator oldest_entry = iter;

Large diffs are not rendered by default.

@@ -91,7 +91,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = nat_offset;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
nat_offset += 4;
}
@@ -110,7 +110,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset;
m_native_vtx_decl.position.type = VAR_FLOAT;
m_native_vtx_decl.position.type = ComponentFormat::Float;
m_native_vtx_decl.position.integer = false;
nat_offset += pos_elements * sizeof(float);

@@ -134,7 +134,7 @@ void VertexLoader::CompileVertexTranslator()
m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true;
m_native_vtx_decl.normals[i].offset = nat_offset;
m_native_vtx_decl.normals[i].type = VAR_FLOAT;
m_native_vtx_decl.normals[i].type = ComponentFormat::Float;
m_native_vtx_decl.normals[i].integer = false;
nat_offset += 12;
}
@@ -143,7 +143,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;

TPipelineFunction pFunc =
@@ -166,7 +166,7 @@ void VertexLoader::CompileVertexTranslator()
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;

const auto tc = m_VtxDesc.high.TexCoord[i].Value();
@@ -6,6 +6,7 @@
#include <array>

#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"

@@ -59,7 +60,7 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at
WriteProtect();
}

void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg)
void VertexLoaderARM64::GetVertexAddr(CPArray array, VertexComponentFormat attribute, ARM64Reg reg)
{
if (IsIndexed(attribute))
{
@@ -95,25 +96,26 @@ void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute
REV16(scratch1_reg, scratch1_reg);
}

if (array == ARRAY_POSITION)
if (array == CPArray::Position)
{
EOR(scratch2_reg, scratch1_reg,
attribute == VertexComponentFormat::Index8 ? LogicalImm(0xFF, 32) :
LogicalImm(0xFFFF, 32));
m_skip_vertex = CBZ(scratch2_reg);
}

LDR(IndexType::Unsigned, scratch2_reg, stride_reg, array * 4);
LDR(IndexType::Unsigned, scratch2_reg, stride_reg, static_cast<u8>(array) * 4);
MUL(scratch1_reg, scratch1_reg, scratch2_reg);

LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg, array * 8);
LDR(IndexType::Unsigned, EncodeRegTo64(scratch2_reg), arraybase_reg,
static_cast<u8>(array) * 8);
ADD(EncodeRegTo64(reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg));
}
else
ADD(reg, src_reg, m_src_ofs);
}

s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute,
s32 VertexLoaderARM64::GetAddressImm(CPArray array, VertexComponentFormat attribute,
Arm64Gen::ARM64Reg reg, u32 align)
{
if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1))))
@@ -219,7 +221,7 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->type = ComponentFormat::Float;
native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out;

@@ -403,8 +405,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
MOV(skipped_reg, ARM64Reg::WZR);
MOV(saved_count, count_reg);

MOVP2R(stride_reg, g_main_cp_state.array_strides);
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases);
MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());

if (need_scale)
MOVP2R(scale_reg, scale_factors);
@@ -427,7 +429,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
@@ -448,8 +450,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes);
load_size <<= 3;

s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg),
load_size);
s32 offset = GetAddressImm(CPArray::Position, m_VtxDesc.low.Position,
EncodeRegTo64(scratch1_reg), load_size);
ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position, offset);
}
@@ -470,7 +472,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_bytes = elem_size * 3;
int load_size = GetLoadSize(load_bytes);

offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
offset = GetAddressImm(CPArray::Normal, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
load_size << 3);

if (offset == -1)
@@ -488,10 +490,10 @@ void VertexLoaderARM64::GenerateVertexLoader()
}
}

for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;

if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
@@ -501,22 +503,22 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_VtxAttr.GetColorFormat(i) == ColorFormat::RGBA4444)
align = 2;

s32 offset = GetAddressImm(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i],
s32 offset = GetAddressImm(CPArray::Color0 + i, m_VtxDesc.low.Color[i],
EncodeRegTo64(scratch1_reg), align);
ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i), offset);
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}

for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;

int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::S ? 1 : 2;
@@ -527,7 +529,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
int load_size = GetLoadSize(load_bytes);
load_size <<= 3;

s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i],
s32 offset = GetAddressImm(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i],
EncodeRegTo64(scratch1_reg), load_size);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
@@ -538,7 +540,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
{
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;

LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]);
@@ -11,6 +11,7 @@ class DataReader;
enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;
enum class CPArray : u8;

class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock
{
@@ -25,8 +26,9 @@ class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlo
u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align);
void GetVertexAddr(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(CPArray array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg,
u32 align);
int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in,
int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset = -1);
@@ -12,16 +12,14 @@
#include <utility>
#include <vector>

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"

#include "Core/DolphinAnalytics.h"
#include "Core/HW/Memmap.h"

#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/NativeVertexFormat.h"
@@ -48,14 +46,21 @@ static std::mutex s_vertex_loader_map_lock;
static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far.

u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS];
Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;

BitSet8 g_main_vat_dirty;
BitSet8 g_preprocess_vat_dirty;
bool g_bases_dirty; // Main only
u8 g_current_vat; // Main only
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;

void Init()
{
MarkAllDirty();
for (auto& map_entry : g_main_cp_state.vertex_loaders)
for (auto& map_entry : g_main_vertex_loaders)
map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
for (auto& map_entry : g_preprocess_vertex_loaders)
map_entry = nullptr;
SETSTAT(g_stats.num_vertex_loaders, 0);
}
@@ -70,7 +75,7 @@ void Clear()
void UpdateVertexArrayPointers()
{
// Anything to update?
if (!g_main_cp_state.bases_dirty)
if (!g_bases_dirty)
return;

// Some games such as Burnout 2 can put invalid addresses into
@@ -80,27 +85,28 @@ void UpdateVertexArrayPointers()
// 12 through 15 are used for loading data into xfmem.
// We also only update the array base if the vertex description states we are going to use it.
if (IsIndexed(g_main_cp_state.vtx_desc.low.Position))
cached_arraybases[ARRAY_POSITION] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]);
cached_arraybases[CPArray::Position] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Position]);

if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal))
cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]);
cached_arraybases[CPArray::Normal] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Normal]);

for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
for (u8 i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
{
if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i]))
cached_arraybases[ARRAY_COLOR0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR0 + i]);
cached_arraybases[CPArray::Color0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::Color0 + i]);
}

for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
{
if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i]))
cached_arraybases[ARRAY_TEXCOORD0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]);
cached_arraybases[CPArray::TexCoord0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[CPArray::TexCoord0 + i]);
}

g_main_cp_state.bases_dirty = false;
g_bases_dirty = false;
}

namespace
@@ -115,8 +121,8 @@ struct entry

void MarkAllDirty()
{
g_main_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
g_main_vat_dirty = BitSet8::AllTrue(8);
g_preprocess_vat_dirty = BitSet8::AllTrue(8);
}

NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
@@ -140,7 +146,8 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
std::memset(&new_decl, 0, sizeof(new_decl));
new_decl.stride = decl.stride;

auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) {
auto MakeDummyAttribute = [](AttributeFormat& attr, ComponentFormat type, int components,
bool integer) {
attr.type = type;
attr.components = components;
attr.offset = 0;
@@ -158,43 +165,45 @@ NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
if (decl.position.enable)
CopyAttribute(new_decl.position, decl.position);
else
MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.position, ComponentFormat::Float, 1, false);
for (size_t i = 0; i < std::size(new_decl.normals); i++)
{
if (decl.normals[i].enable)
CopyAttribute(new_decl.normals[i], decl.normals[i]);
else
MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.normals[i], ComponentFormat::Float, 1, false);
}
for (size_t i = 0; i < std::size(new_decl.colors); i++)
{
if (decl.colors[i].enable)
CopyAttribute(new_decl.colors[i], decl.colors[i]);
else
MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false);
MakeDummyAttribute(new_decl.colors[i], ComponentFormat::UByte, 4, false);
}
for (size_t i = 0; i < std::size(new_decl.texcoords); i++)
{
if (decl.texcoords[i].enable)
CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]);
else
MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false);
MakeDummyAttribute(new_decl.texcoords[i], ComponentFormat::Float, 1, false);
}
if (decl.posmtx.enable)
CopyAttribute(new_decl.posmtx, decl.posmtx);
else
MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true);
MakeDummyAttribute(new_decl.posmtx, ComponentFormat::UByte, 1, true);

return GetOrCreateMatchingFormat(new_decl);
}

static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
{
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
state->last_id = vtx_attr_group;
BitSet8& attr_dirty = preprocess ? g_preprocess_vat_dirty : g_main_vat_dirty;
auto& vertex_loaders = preprocess ? g_main_vertex_loaders : g_preprocess_vertex_loaders;
g_current_vat = vtx_attr_group;

VertexLoaderBase* loader;
if (state->attr_dirty[vtx_attr_group])
if (attr_dirty[vtx_attr_group])
{
// We are not allowed to create a native vertex format on preprocessing as this is on the wrong
// thread
@@ -224,12 +233,12 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
native = g_renderer->CreateNativeVertexFormat(format);
loader->m_native_vertex_format = native.get();
}
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty[vtx_attr_group] = false;
vertex_loaders[vtx_attr_group] = loader;
attr_dirty[vtx_attr_group] = false;
}
else
{
loader = state->vertex_loaders[vtx_attr_group];
loader = vertex_loaders[vtx_attr_group];
}

// Lookup pointers for any vertex arrays.
@@ -239,7 +248,8 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
return loader;
}

int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess)
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess)
{
if (!count)
return 0;
@@ -266,7 +276,8 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5);
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);

DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
@@ -287,147 +298,3 @@ NativeVertexFormat* GetCurrentVertexFormat()
}

} // namespace VertexLoaderManager

void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & CP_COMMAND_MASK)
{
case UNKNOWN_00:
case UNKNOWN_10:
case UNKNOWN_20:
if (!(sub_cmd == UNKNOWN_20 && value == 0))
{
// All titles using libogc or the official SDK issue 0x20 with value=0 on startup
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_CP_PERF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Unknown CP command possibly relating to perf queries used: {:02x}",
sub_cmd);
}
break;

case MATINDEX_A:
if (sub_cmd != MATINDEX_A)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_A: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_A, sub_cmd);
}

if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;

case MATINDEX_B:
if (sub_cmd != MATINDEX_B)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP MATINDEX_B: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
MATINDEX_B, sub_cmd);
}

if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;

case VCD_LO:
if (sub_cmd != VCD_LO) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_LO: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_LO, sub_cmd);
}

state->vtx_desc.low.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;

case VCD_HI:
if (sub_cmd != VCD_HI) // Stricter than YAGCD
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO,
"CP VCD_HI: an exact value of {:02x} was expected "
"but instead a value of {:02x} was seen",
VCD_HI, sub_cmd);
}

state->vtx_desc.high.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;

case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
{
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_MAYBE_INVALID_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
}
state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

// Pointers to vertex arrays in GC RAM
case ARRAY_BASE:
state->array_bases[sub_cmd & CP_ARRAY_MASK] =
value & CommandProcessor::GetPhysicalAddressMask();
state->bases_dirty = true;
break;

case ARRAY_STRIDE:
state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF;
break;

default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_CP_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}

void FillCPMemoryArray(u32* memory)
{
memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex;
memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex;
memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex;
memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex;

for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}

for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i];
memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i];
}
}
@@ -3,17 +3,24 @@

#pragma once

#include <array>
#include <memory>
#include <string>
#include <unordered_map>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "VideoCommon/CPMemory.h"

class DataReader;
class NativeVertexFormat;
struct PortableVertexDeclaration;

namespace OpcodeDecoder
{
enum class Primitive : u8;
};

namespace VertexLoaderManager
{
using NativeVertexFormatMap =
@@ -35,12 +42,13 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);

// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess);
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess);

NativeVertexFormat* GetCurrentVertexFormat();

// Resolved pointers to array bases. Used by vertex loaders.
extern u8* cached_arraybases[NUM_VERTEX_COMPONENT_ARRAYS];
extern Common::EnumMap<u8*, CPArray::TexCoord7> cached_arraybases;
void UpdateVertexArrayPointers();

// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
@@ -50,4 +58,11 @@ extern u32 position_matrix_index[4];

// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;

extern BitSet8 g_main_vat_dirty;
extern BitSet8 g_preprocess_vat_dirty;
extern bool g_bases_dirty; // Main only
extern u8 g_current_vat; // Main only
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_main_vertex_loaders;
extern std::array<VertexLoaderBase*, CP_NUM_VAT_REG> g_preprocess_vertex_loaders;
} // namespace VertexLoaderManager
@@ -15,6 +15,7 @@
#include "Common/JitRegister.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"

@@ -54,15 +55,15 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att)
JitRegister::Register(region, GetCodePtr(), name.c_str());
}

OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute)
OpArg VertexLoaderX64::GetVertexAddr(CPArray array, VertexComponentFormat attribute)
{
OpArg data = MDisp(src_reg, m_src_ofs);
if (IsIndexed(attribute))
{
int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8;
if (array == ARRAY_POSITION)
if (array == CPArray::Position)
{
CMP(bits, R(scratch1), Imm8(-1));
m_skip_vertex = J_CC(CC_E, true);
@@ -121,7 +122,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
native_format->components = count_out;
native_format->enable = true;
native_format->offset = m_dst_ofs;
native_format->type = VAR_FLOAT;
native_format->type = ComponentFormat::Float;
native_format->integer = false;

m_dst_ofs += sizeof(float) * count_out;
@@ -420,7 +421,7 @@ void VertexLoaderX64::GenerateVertexLoader()
m_native_vtx_decl.posmtx.components = 4;
m_native_vtx_decl.posmtx.enable = true;
m_native_vtx_decl.posmtx.offset = m_dst_ofs;
m_native_vtx_decl.posmtx.type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.posmtx.type = ComponentFormat::UByte;
m_native_vtx_decl.posmtx.integer = true;
m_src_ofs += sizeof(u8);
m_dst_ofs += sizeof(u32);
@@ -433,7 +434,7 @@ void VertexLoaderX64::GenerateVertexLoader()
texmatidx_ofs[i] = m_src_ofs++;
}

OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position);
OpArg data = GetVertexAddr(CPArray::Position, m_VtxDesc.low.Position);
int pos_elements = m_VtxAttr.g0.PosElements == CoordComponentCount::XY ? 2 : 3;
ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.g0.PosFormat, pos_elements, pos_elements,
m_VtxAttr.g0.ByteDequant, m_VtxAttr.g0.PosFrac, &m_native_vtx_decl.position);
@@ -448,7 +449,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{
if (!i || m_VtxAttr.g0.NormalIndex3)
{
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal);
data = GetVertexAddr(CPArray::Normal, m_VtxDesc.low.Normal);
int elem_size = GetElementSize(m_VtxAttr.g0.NormalFormat);
data.AddMemOffset(i * elem_size * 3);
}
@@ -457,27 +458,27 @@ void VertexLoaderX64::GenerateVertexLoader()
}
}

for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_COLOR0 + int(i), m_VtxDesc.low.Color[i]);
data = GetVertexAddr(CPArray::Color0 + i, m_VtxDesc.low.Color[i]);
ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.GetColorFormat(i));
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
m_native_vtx_decl.colors[i].offset = m_dst_ofs;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].type = ComponentFormat::UByte;
m_native_vtx_decl.colors[i].integer = false;
m_dst_ofs += 4;
}
}

for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
for (u8 i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
int elements = m_VtxAttr.GetTexElements(i) == TexComponentCount::ST ? 2 : 1;
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]);
data = GetVertexAddr(CPArray::TexCoord0 + i, m_VtxDesc.high.TexCoord[i]);
u8 scaling_exponent = m_VtxAttr.GetTexFrac(i);
ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.GetTexFormat(i), elements,
m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.g0.ByteDequant,
@@ -487,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader()
{
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].type = ComponentFormat::Float;
m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
@@ -10,6 +10,7 @@
enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;
enum class CPArray : u8;

class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
{
@@ -23,7 +24,7 @@ class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute);
Gen::OpArg GetVertexAddr(CPArray array, VertexComponentFormat attribute);
int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format);
@@ -6,6 +6,7 @@
#include <cstring>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/MsgHandler.h"
#include "Common/Swap.h"

@@ -78,8 +79,8 @@ void Color_ReadIndex_16b_565(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);

u16 value;
std::memcpy(&value, address, sizeof(u16));
@@ -91,27 +92,27 @@ template <typename I>
void Color_ReadIndex_24b_888(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address));
}

template <typename I>
void Color_ReadIndex_32b_888x(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read24(address));
}

template <typename I>
void Color_ReadIndex_16b_4444(VertexLoader* loader)
{
auto const index = DataRead<I>();
const auto index = DataRead<I>();
const u8* const address =
VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);

u16 value;
std::memcpy(&value, address, sizeof(u16));
@@ -123,18 +124,18 @@ template <typename I>
void Color_ReadIndex_24b_6666(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* data = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]) - 1;
const u32 val = Common::swap32(data);
const u8* data = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
const u32 val = Common::swap24(data);
SetCol6666(loader, val);
}

template <typename I>
void Color_ReadIndex_32b_8888(VertexLoader* loader)
{
const auto index = DataRead<I>();
const u8* address = VertexLoaderManager::cached_arraybases[ARRAY_COLOR0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[ARRAY_COLOR0 + loader->m_colIndex]);
const u8* address = VertexLoaderManager::cached_arraybases[CPArray::Color0 + loader->m_colIndex] +
(index * g_main_cp_state.array_strides[CPArray::Color0 + loader->m_colIndex]);
SetCol(loader, Read32(address));
}

@@ -166,7 +167,7 @@ void Color_ReadDirect_16b_4444(VertexLoader* loader)

void Color_ReadDirect_24b_6666(VertexLoader* loader)
{
SetCol6666(loader, Common::swap32(DataGetPosition() - 1));
SetCol6666(loader, Common::swap24(DataGetPosition()));
DataSkip(3);
}

@@ -175,21 +176,40 @@ void Color_ReadDirect_32b_8888(VertexLoader* loader)
SetCol(loader, DataReadU32Unswapped());
}

constexpr TPipelineFunction s_table_read_color[4][6] = {
{nullptr, nullptr, nullptr, nullptr, nullptr, nullptr},
{Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x,
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888},
{Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>,
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>},
{Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>,
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>, Color_ReadIndex_32b_8888<u16>},
using Common::EnumMap;

// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, ColorFormat::RGBA8888>
f(EnumMap<TPipelineFunction, ColorFormat::RGBA8888> in)
{
return in;
}
constexpr EnumMap<u32, ColorFormat::RGBA8888> g(EnumMap<u32, ColorFormat::RGBA8888> in)
{
return in;
}

template <typename T>
using Table = EnumMap<EnumMap<T, ColorFormat::RGBA8888>, VertexComponentFormat::Index16>;

constexpr Table<TPipelineFunction> s_table_read_color = {
f({nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}),
f({Color_ReadDirect_16b_565, Color_ReadDirect_24b_888, Color_ReadDirect_32b_888x,
Color_ReadDirect_16b_4444, Color_ReadDirect_24b_6666, Color_ReadDirect_32b_8888}),
f({Color_ReadIndex_16b_565<u8>, Color_ReadIndex_24b_888<u8>, Color_ReadIndex_32b_888x<u8>,
Color_ReadIndex_16b_4444<u8>, Color_ReadIndex_24b_6666<u8>, Color_ReadIndex_32b_8888<u8>}),
f({Color_ReadIndex_16b_565<u16>, Color_ReadIndex_24b_888<u16>, Color_ReadIndex_32b_888x<u16>,
Color_ReadIndex_16b_4444<u16>, Color_ReadIndex_24b_6666<u16>,
Color_ReadIndex_32b_8888<u16>}),
};

constexpr u32 s_table_read_color_vertex_size[4][6] = {
{0, 0, 0, 0, 0, 0},
{2, 3, 4, 2, 3, 4},
{1, 1, 1, 1, 1, 1},
{2, 2, 2, 2, 2, 2},
constexpr Table<u32> s_table_read_color_vertex_size = {
g({0u, 0u, 0u, 0u, 0u, 0u}),
g({2u, 3u, 4u, 2u, 3u, 4u}),
g({1u, 1u, 1u, 1u, 1u, 1u}),
g({2u, 2u, 2u, 2u, 2u, 2u}),
};
} // Anonymous namespace

@@ -200,7 +220,7 @@ u32 VertexLoader_Color::GetSize(VertexComponentFormat type, ColorFormat format)
PanicAlertFmt("Invalid color format {}", format);
return 0;
}
return s_table_read_color_vertex_size[u32(type)][u32(format)];
return s_table_read_color_vertex_size[type][format];
}

TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, ColorFormat format)
@@ -210,5 +230,5 @@ TPipelineFunction VertexLoader_Color::GetFunction(VertexComponentFormat type, Co
PanicAlertFmt("Invalid color format {}", format);
return nullptr;
}
return s_table_read_color[u32(type)][u32(format)];
return s_table_read_color[type][format];
}
@@ -7,6 +7,7 @@
#include <type_traits>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"

#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
@@ -58,7 +59,7 @@ struct Normal_Direct
{
static void function([[maybe_unused]] VertexLoader* loader)
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
@@ -71,10 +72,10 @@ void Normal_Index_Offset()
{
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");

auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] +
(index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[CPArray::Normal] +
(index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}

@@ -98,39 +99,6 @@ struct Normal_Index_Indices3
static constexpr u32 size = sizeof(I) * 3;
};

enum NormalType
{
NRM_NOT_PRESENT = 0,
NRM_DIRECT = 1,
NRM_INDEX8 = 2,
NRM_INDEX16 = 3,
NUM_NRM_TYPE
};

enum NormalFormat
{
FORMAT_UBYTE = 0,
FORMAT_BYTE = 1,
FORMAT_USHORT = 2,
FORMAT_SHORT = 3,
FORMAT_FLOAT = 4,
NUM_NRM_FORMAT
};

enum NormalElements
{
NRM_NBT = 0,
NRM_NBT3 = 1,
NUM_NRM_ELEMENTS
};

enum NormalIndices
{
NRM_INDICES1 = 0,
NRM_INDICES3 = 1,
NUM_NRM_INDICES
};

struct Set
{
template <typename T>
@@ -145,83 +113,88 @@ struct Set
TPipelineFunction function;
};

using Formats = std::array<Set, NUM_NRM_FORMAT>;
using Elements = std::array<Formats, NUM_NRM_ELEMENTS>;
using Indices = std::array<Elements, NUM_NRM_INDICES>;
using Types = std::array<Indices, NUM_NRM_TYPE>;
using Common::EnumMap;
using Formats = EnumMap<Set, ComponentFormat::Float>;
using Elements = EnumMap<Formats, NormalComponentCount::NBT>;
using Indices = std::array<Elements, 2>;
using Types = EnumMap<Indices, VertexComponentFormat::Index16>;

constexpr Types InitializeTable()
{
Types table{};

table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();

// Same as above
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct<u8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct<s8, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct<u16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct<s16, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct<float, 1>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();

table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();

// Same as above for NRM_NBT
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();

table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();

// Same as above for NRM_NBT
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
using VCF = VertexComponentFormat;
using NCC = NormalComponentCount;
using FMT = ComponentFormat;

table[VCF::Direct][false][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][false][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][false][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][false][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][false][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][false][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();

// Same as above, since there are no indices
table[VCF::Direct][true][NCC::N][FMT::UByte] = Normal_Direct<u8, 1>();
table[VCF::Direct][true][NCC::N][FMT::Byte] = Normal_Direct<s8, 1>();
table[VCF::Direct][true][NCC::N][FMT::UShort] = Normal_Direct<u16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Short] = Normal_Direct<s16, 1>();
table[VCF::Direct][true][NCC::N][FMT::Float] = Normal_Direct<float, 1>();
table[VCF::Direct][true][NCC::NBT][FMT::UByte] = Normal_Direct<u8, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Byte] = Normal_Direct<s8, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::UShort] = Normal_Direct<u16, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Short] = Normal_Direct<s16, 3>();
table[VCF::Direct][true][NCC::NBT][FMT::Float] = Normal_Direct<float, 3>();

table[VCF::Index8][false][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][false][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][false][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][false][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][false][NCC::NBT][FMT::UByte] = Normal_Index<u8, u8, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Byte] = Normal_Index<u8, s8, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::UShort] = Normal_Index<u8, u16, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Short] = Normal_Index<u8, s16, 3>();
table[VCF::Index8][false][NCC::NBT][FMT::Float] = Normal_Index<u8, float, 3>();

// Same for NormalComponentCount::N; differs for NBT
table[VCF::Index8][true][NCC::N][FMT::UByte] = Normal_Index<u8, u8, 1>();
table[VCF::Index8][true][NCC::N][FMT::Byte] = Normal_Index<u8, s8, 1>();
table[VCF::Index8][true][NCC::N][FMT::UShort] = Normal_Index<u8, u16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Short] = Normal_Index<u8, s16, 1>();
table[VCF::Index8][true][NCC::N][FMT::Float] = Normal_Index<u8, float, 1>();
table[VCF::Index8][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u8, u8>();
table[VCF::Index8][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u8, s8>();
table[VCF::Index8][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u8, u16>();
table[VCF::Index8][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u8, s16>();
table[VCF::Index8][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u8, float>();

table[VCF::Index16][false][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][false][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][false][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][false][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][false][NCC::NBT][FMT::UByte] = Normal_Index<u16, u8, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Byte] = Normal_Index<u16, s8, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::UShort] = Normal_Index<u16, u16, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Short] = Normal_Index<u16, s16, 3>();
table[VCF::Index16][false][NCC::NBT][FMT::Float] = Normal_Index<u16, float, 3>();

// Same for NormalComponentCount::N; differs for NBT
table[VCF::Index16][true][NCC::N][FMT::UByte] = Normal_Index<u16, u8, 1>();
table[VCF::Index16][true][NCC::N][FMT::Byte] = Normal_Index<u16, s8, 1>();
table[VCF::Index16][true][NCC::N][FMT::UShort] = Normal_Index<u16, u16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Short] = Normal_Index<u16, s16, 1>();
table[VCF::Index16][true][NCC::N][FMT::Float] = Normal_Index<u16, float, 1>();
table[VCF::Index16][true][NCC::NBT][FMT::UByte] = Normal_Index_Indices3<u16, u8>();
table[VCF::Index16][true][NCC::NBT][FMT::Byte] = Normal_Index_Indices3<u16, s8>();
table[VCF::Index16][true][NCC::NBT][FMT::UShort] = Normal_Index_Indices3<u16, u16>();
table[VCF::Index16][true][NCC::NBT][FMT::Short] = Normal_Index_Indices3<u16, s16>();
table[VCF::Index16][true][NCC::NBT][FMT::Float] = Normal_Index_Indices3<u16, float>();

return table;
}
@@ -230,14 +203,14 @@ constexpr Types s_table = InitializeTable();
} // Anonymous namespace

u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3)
NormalComponentCount elements, bool index3)
{
return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size;
return s_table[type][index3][elements][format].gc_size;
}

TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type,
ComponentFormat format,
NormalComponentCount elements, u32 index3)
NormalComponentCount elements, bool index3)
{
return s_table[u32(type)][index3][u32(elements)][u32(format)].function;
return s_table[type][index3][elements][format].function;
}
@@ -14,8 +14,8 @@ class VertexLoader_Normal
{
public:
static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);
NormalComponentCount elements, bool index3);

static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);
NormalComponentCount elements, bool index3);
};
@@ -7,6 +7,7 @@
#include <type_traits>

#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Swap.h"

#include "VideoCommon/DataReader.h"
@@ -59,8 +60,8 @@ void Pos_ReadIndex(VertexLoader* loader)
const auto index = DataRead<I>();
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
const auto data =
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] +
(index * g_main_cp_state.array_strides[ARRAY_POSITION]));
reinterpret_cast<const T*>(VertexLoaderManager::cached_arraybases[CPArray::Position] +
(index * g_main_cp_state.array_strides[CPArray::Position]));
const auto scale = loader->m_posScale;
DataReader dst(g_vertex_manager_write_ptr, nullptr);

@@ -76,138 +77,109 @@ void Pos_ReadIndex(VertexLoader* loader)
LOG_VTX();
}

constexpr TPipelineFunction s_table_read_position[4][8][2] = {
{
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
},
{
{
Pos_ReadDirect<u8, 2>,
Pos_ReadDirect<u8, 3>,
},
{
Pos_ReadDirect<s8, 2>,
Pos_ReadDirect<s8, 3>,
},
{
Pos_ReadDirect<u16, 2>,
Pos_ReadDirect<u16, 3>,
},
{
Pos_ReadDirect<s16, 2>,
Pos_ReadDirect<s16, 3>,
},
{
Pos_ReadDirect<float, 2>,
Pos_ReadDirect<float, 3>,
},
},
{
{
Pos_ReadIndex<u8, u8, 2>,
Pos_ReadIndex<u8, u8, 3>,
},
{
Pos_ReadIndex<u8, s8, 2>,
Pos_ReadIndex<u8, s8, 3>,
},
{
Pos_ReadIndex<u8, u16, 2>,
Pos_ReadIndex<u8, u16, 3>,
},
{
Pos_ReadIndex<u8, s16, 2>,
Pos_ReadIndex<u8, s16, 3>,
},
{
Pos_ReadIndex<u8, float, 2>,
Pos_ReadIndex<u8, float, 3>,
},
},
{
{
Pos_ReadIndex<u16, u8, 2>,
Pos_ReadIndex<u16, u8, 3>,
},
{
Pos_ReadIndex<u16, s8, 2>,
Pos_ReadIndex<u16, s8, 3>,
},
{
Pos_ReadIndex<u16, u16, 2>,
Pos_ReadIndex<u16, u16, 3>,
},
{
Pos_ReadIndex<u16, s16, 2>,
Pos_ReadIndex<u16, s16, 3>,
},
{
Pos_ReadIndex<u16, float, 2>,
Pos_ReadIndex<u16, float, 3>,
},
},
using Common::EnumMap;

// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, CoordComponentCount::XYZ> e(TPipelineFunction xy,
TPipelineFunction xyz)
{
return {xy, xyz};
}
constexpr EnumMap<u32, CoordComponentCount::XYZ> e(u32 xy, u32 xyz)
{
return {xy, xyz};
}

constexpr EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float>
f(EnumMap<EnumMap<TPipelineFunction, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
{
return in;
}

constexpr EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float>
g(EnumMap<EnumMap<u32, CoordComponentCount::XYZ>, ComponentFormat::Float> in)
{
return in;
}

template <typename T>
using Table = EnumMap<EnumMap<EnumMap<T, CoordComponentCount::XYZ>, ComponentFormat::Float>,
VertexComponentFormat::Index16>;

constexpr Table<TPipelineFunction> s_table_read_position = {
f({
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
}),
f({
e(Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>),
e(Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>),
e(Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>),
e(Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>),
e(Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>),
}),
f({
e(Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>),
e(Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>),
e(Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>),
e(Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>),
e(Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>),
}),
f({
e(Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>),
e(Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>),
e(Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>),
e(Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>),
e(Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>),
}),
};

constexpr u32 s_table_read_position_vertex_size[4][8][2] = {
{
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
},
{
{2, 3},
{2, 3},
{4, 6},
{4, 6},
{8, 12},
},
{
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
},
{
{2, 2},
{2, 2},
{2, 2},
{2, 2},
{2, 2},
},
constexpr Table<u32> s_table_read_position_vertex_size = {
g({
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
}),
g({
e(2, 3),
e(2, 3),
e(4, 6),
e(4, 6),
e(8, 12),
}),
g({
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
}),
g({
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
}),
};
} // Anonymous namespace

u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)];
return s_table_read_position_vertex_size[type][format][elements];
}

TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type,
ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position[u32(type)][u32(format)][u32(elements)];
return s_table_read_position[type][format][elements];
}
@@ -55,8 +55,8 @@ void TexCoord_ReadIndex(VertexLoader* loader)

const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
VertexLoaderManager::cached_arraybases[CPArray::TexCoord0 + loader->m_tcIndex] +
(index * g_main_cp_state.array_strides[CPArray::TexCoord0 + loader->m_tcIndex]));
const auto scale = loader->m_tcScale[loader->m_tcIndex];
DataReader dst(g_vertex_manager_write_ptr, nullptr);

@@ -67,140 +67,110 @@ void TexCoord_ReadIndex(VertexLoader* loader)
++loader->m_tcIndex;
}

constexpr TPipelineFunction s_table_read_tex_coord[4][8][2] = {
{
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
{
nullptr,
nullptr,
},
},
{
{
TexCoord_ReadDirect<u8, 1>,
TexCoord_ReadDirect<u8, 2>,
},
{
TexCoord_ReadDirect<s8, 1>,
TexCoord_ReadDirect<s8, 2>,
},
{
TexCoord_ReadDirect<u16, 1>,
TexCoord_ReadDirect<u16, 2>,
},
{
TexCoord_ReadDirect<s16, 1>,
TexCoord_ReadDirect<s16, 2>,
},
{
TexCoord_ReadDirect<float, 1>,
TexCoord_ReadDirect<float, 2>,
},
},
{
{
TexCoord_ReadIndex<u8, u8, 1>,
TexCoord_ReadIndex<u8, u8, 2>,
},
{
TexCoord_ReadIndex<u8, s8, 1>,
TexCoord_ReadIndex<u8, s8, 2>,
},
{
TexCoord_ReadIndex<u8, u16, 1>,
TexCoord_ReadIndex<u8, u16, 2>,
},
{
TexCoord_ReadIndex<u8, s16, 1>,
TexCoord_ReadIndex<u8, s16, 2>,
},
{
TexCoord_ReadIndex<u8, float, 1>,
TexCoord_ReadIndex<u8, float, 2>,
},
},
{
{
TexCoord_ReadIndex<u16, u8, 1>,
TexCoord_ReadIndex<u16, u8, 2>,
},
{
TexCoord_ReadIndex<u16, s8, 1>,
TexCoord_ReadIndex<u16, s8, 2>,
},
{
TexCoord_ReadIndex<u16, u16, 1>,
TexCoord_ReadIndex<u16, u16, 2>,
},
{
TexCoord_ReadIndex<u16, s16, 1>,
TexCoord_ReadIndex<u16, s16, 2>,
},
{
TexCoord_ReadIndex<u16, float, 1>,
TexCoord_ReadIndex<u16, float, 2>,
},
},
using Common::EnumMap;
// These functions are to work around a "too many initializer values" error with nested brackets
// C++ does not let you write std::array<std::array<u32, 2>, 2> a = {{1, 2}, {3, 4}}
// (although it does allow std::array<std::array<u32, 2>, 2> b = {1, 2, 3, 4})
constexpr EnumMap<TPipelineFunction, TexComponentCount::ST> e(TPipelineFunction s,
TPipelineFunction st)
{
return {s, st};
}
constexpr EnumMap<u32, TexComponentCount::ST> e(u32 s, u32 st)
{
return {s, st};
}

constexpr EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float>
f(EnumMap<EnumMap<TPipelineFunction, TexComponentCount::ST>, ComponentFormat::Float> in)
{
return in;
}

constexpr EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float>
g(EnumMap<EnumMap<u32, TexComponentCount::ST>, ComponentFormat::Float> in)
{
return in;
}

template <typename T>
using Table = EnumMap<EnumMap<EnumMap<T, TexComponentCount::ST>, ComponentFormat::Float>,
VertexComponentFormat::Index16>;

constexpr Table<TPipelineFunction> s_table_read_tex_coord = {
f({
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
e(nullptr, nullptr),
}),
f({
e(TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>),
e(TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>),
e(TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>),
e(TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>),
e(TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>),
}),
f({
e(TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>),
e(TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>),
e(TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>),
e(TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>),
e(TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>),
}),
f({
e(TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>),
e(TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>),
e(TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>),
e(TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>),
e(TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>),
}),
};

constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = {
{
{0, 0},
{0, 0},
{0, 0},
{0, 0},
{0, 0},
},
{
{1, 2},
{1, 2},
{2, 4},
{2, 4},
{4, 8},
},
{
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
},
{
{2, 2},
{2, 2},
{2, 2},
{2, 2},
{2, 2},
},
constexpr Table<u32> s_table_read_tex_coord_vertex_size = {
g({
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
e(0u, 0u),
}),
g({
e(1, 2),
e(1, 2),
e(2, 4),
e(2, 4),
e(4, 8),
}),
g({
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
e(1, 1),
}),
g({
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
e(2, 2),
}),
};
} // Anonymous namespace

u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)];
return s_table_read_tex_coord_vertex_size[type][format][elements];
}

TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type,
ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)];
return s_table_read_tex_coord[type][format][elements];
}

TPipelineFunction VertexLoader_TextCoord::GetDummyFunction()
@@ -10,6 +10,7 @@
#include "Common/BitSet.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/EnumMap.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"

@@ -38,8 +39,10 @@

std::unique_ptr<VertexManagerBase> g_vertex_manager;

using OpcodeDecoder::Primitive;

// GX primitive -> RenderState primitive, no primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx{{
constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx{
PrimitiveType::Triangles, // GX_DRAW_QUADS
PrimitiveType::Triangles, // GX_DRAW_QUADS_2
PrimitiveType::Triangles, // GX_DRAW_TRIANGLES
@@ -48,10 +51,10 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx{{
PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS
}};
};

// GX primitive -> RenderState primitive, using primitive restart
constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{
constexpr Common::EnumMap<PrimitiveType, Primitive::GX_DRAW_POINTS> primitive_from_gx_pr{
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS
PrimitiveType::TriangleStrip, // GX_DRAW_QUADS_2
PrimitiveType::TriangleStrip, // GX_DRAW_TRIANGLES
@@ -60,7 +63,7 @@ constexpr std::array<PrimitiveType, 8> primitive_from_gx_pr{{
PrimitiveType::Lines, // GX_DRAW_LINES
PrimitiveType::Lines, // GX_DRAW_LINE_STRIP
PrimitiveType::Points, // GX_DRAW_POINTS
}};
};

// Due to the BT.601 standard which the GameCube is based on being a compromise
// between PAL and NTSC, neither standard gets square pixels. They are each off
@@ -107,13 +110,13 @@ u32 VertexManagerBase::GetRemainingSize() const
return static_cast<u32>(m_end_buffer_pointer - m_cur_buffer_pointer);
}

void VertexManagerBase::AddIndices(int primitive, u32 num_vertices)
void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices)
{
m_index_generator.AddIndices(primitive, num_vertices);
}

DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride,
bool cullall)
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
u32 count, u32 stride, bool cullall)
{
// Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently.
g_framebuffer_manager->FlushEFBPokes();
@@ -185,30 +188,30 @@ void VertexManagerBase::FlushData(u32 count, u32 stride)
m_cur_buffer_pointer += count * stride;
}

u32 VertexManagerBase::GetRemainingIndices(int primitive) const
u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
{
const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();

if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS_2:
return index_len / 5 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
return index_len / 4 * 3;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;

case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;

case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
return index_len;

default:
@@ -219,22 +222,22 @@ u32 VertexManagerBase::GetRemainingIndices(int primitive) const
{
switch (primitive)
{
case OpcodeDecoder::GX_DRAW_QUADS:
case OpcodeDecoder::GX_DRAW_QUADS_2:
case Primitive::GX_DRAW_QUADS:
case Primitive::GX_DRAW_QUADS_2:
return index_len / 6 * 4;
case OpcodeDecoder::GX_DRAW_TRIANGLES:
case Primitive::GX_DRAW_TRIANGLES:
return index_len;
case OpcodeDecoder::GX_DRAW_TRIANGLE_STRIP:
case Primitive::GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2;
case OpcodeDecoder::GX_DRAW_TRIANGLE_FAN:
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;

case OpcodeDecoder::GX_DRAW_LINES:
case Primitive::GX_DRAW_LINES:
return index_len;
case OpcodeDecoder::GX_DRAW_LINE_STRIP:
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;

case OpcodeDecoder::GX_DRAW_POINTS:
case Primitive::GX_DRAW_POINTS:
return index_len;

default:
@@ -35,6 +35,11 @@ enum TexelBufferFormat : u32
NUM_TEXEL_BUFFER_FORMATS
};

namespace OpcodeDecoder
{
enum class Primitive : u8;
};

class VertexManagerBase
{
private:
@@ -93,8 +98,9 @@ class VertexManagerBase
virtual bool Initialize();

PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
void AddIndices(int primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride,
bool cullall);
void FlushData(u32 count, u32 stride);

void Flush();
@@ -163,7 +169,7 @@ class VertexManagerBase
virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex);

u32 GetRemainingSize() const;
u32 GetRemainingIndices(int primitive) const;
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;

void CalculateZSlope(NativeVertexFormat* format);
void LoadTextures();
@@ -23,6 +23,7 @@
#include "VideoCommon/FreeLookCamera.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
@@ -13,8 +13,6 @@
#include "Common/EnumFormatter.h"
#include "VideoCommon/CPMemory.h"

class DataReader;

constexpr size_t NUM_XF_COLOR_CHANNELS = 2;

// Lighting
@@ -454,10 +452,10 @@ struct XFMemory
u32 unk9[8]; // 0x1048 - 0x104f
PostMtxInfo postMtxInfo[8]; // 0x1050 - 0x1057
};
static_assert(sizeof(XFMemory) == sizeof(u32) * 0x1058);
static_assert(sizeof(XFMemory) == sizeof(u32) * XFMEM_REGISTERS_END);

extern XFMemory xfmem;

void LoadXFReg(u32 transferSize, u32 address, DataReader src);
void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray);
void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data);
void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size);
void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size);
@@ -12,7 +12,6 @@
#include "Core/HW/Memmap.h"

#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h"
@@ -26,16 +25,10 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
}

static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
static void XFRegWritten(u32 address, u32 value)
{
u32 address = baseAddress;
u32 dataIndex = 0;

while (transferSize > 0 && address < XFMEM_REGISTERS_END)
if (address >= XFMEM_REGISTERS_START && address < XFMEM_REGISTERS_END)
{
u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
u32 nextAddress = address + 1;

switch (address)
{
case XFMEM_ERROR:
@@ -44,12 +37,12 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_STATE1: // internal state 1
case XFMEM_CLOCK:
case XFMEM_SETGPMETRIC:
nextAddress = 0x1007;
// Not implemented
break;

case XFMEM_CLIPDISABLE:
{
ClipDisable setting{.hex = newValue};
ClipDisable setting{.hex = value};
if (setting.disable_clipping_detection)
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::SETS_XF_CLIPDISABLE_BIT_0);
if (setting.disable_trivial_rejection)
@@ -63,7 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
break;

case XFMEM_SETNUMCHAN:
if (xfmem.numChan.numColorChans != (newValue & 3))
if (xfmem.numChan.numColorChans != (value & 3))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;
@@ -72,7 +65,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_AMBCOLOR:
{
u8 chan = address - XFMEM_SETCHAN0_AMBCOLOR;
if (xfmem.ambColor[chan] != newValue)
if (xfmem.ambColor[chan] != value)
{
g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan);
@@ -84,7 +77,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_MATCOLOR:
{
u8 chan = address - XFMEM_SETCHAN0_MATCOLOR;
if (xfmem.matColor[chan] != newValue)
if (xfmem.matColor[chan] != value)
{
g_vertex_manager->Flush();
VertexShaderManager::SetMaterialColorChanged(chan + 2);
@@ -96,22 +89,22 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_COLOR:
case XFMEM_SETCHAN0_ALPHA: // Channel Alpha
case XFMEM_SETCHAN1_ALPHA:
if (((u32*)&xfmem)[address] != (newValue & 0x7fff))
if (((u32*)&xfmem)[address] != (value & 0x7fff))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;

case XFMEM_DUALTEX:
if (xfmem.dualTexTrans.enabled != bool(newValue & 1))
if (xfmem.dualTexTrans.enabled != bool(value & 1))
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(-1);
break;

case XFMEM_SETMATRIXINDA:
VertexShaderManager::SetTexMatrixChangedA(newValue);
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case XFMEM_SETMATRIXINDB:
VertexShaderManager::SetTexMatrixChangedB(newValue);
VertexShaderManager::SetTexMatrixChangedB(value);
break;

case XFMEM_SETVIEWPORT:
@@ -124,8 +117,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
VertexShaderManager::SetViewportChanged();
PixelShaderManager::SetViewportChanged();
GeometryShaderManager::SetViewportChanged();

nextAddress = XFMEM_SETVIEWPORT + 6;
break;

case XFMEM_SETPROJECTION:
@@ -138,12 +129,10 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
g_vertex_manager->Flush();
VertexShaderManager::SetProjectionChanged();
GeometryShaderManager::SetProjectionChanged();

nextAddress = XFMEM_SETPROJECTION + 7;
break;

case XFMEM_SETNUMTEXGENS: // GXSetNumTexGens
if (xfmem.numTexGen.numTexGens != (newValue & 15))
if (xfmem.numTexGen.numTexGens != (value & 15))
g_vertex_manager->Flush();
break;

@@ -157,8 +146,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETTEXMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO);

nextAddress = XFMEM_SETTEXMTXINFO + 8;
break;

case XFMEM_SETPOSTMTXINFO:
@@ -171,8 +158,6 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETPOSTMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSTMTXINFO);

nextAddress = XFMEM_SETPOSTMTXINFO + 8;
break;

// --------------
@@ -189,7 +174,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case 0x104e:
case 0x104f:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, newValue);
DEBUG_LOG_FMT(VIDEO, "Possible Normal Mtx XF reg?: {:x}={:x}", address, value);
break;

case 0x1013:
@@ -200,83 +185,69 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)

default:
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, newValue);
WARN_LOG_FMT(VIDEO, "Unknown XF Reg: {:x}={:x}", address, value);
break;
}

int transferred = nextAddress - address;
address = nextAddress;

transferSize -= transferred;
dataIndex += transferred;
}
}

void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src)
void LoadXFReg(u16 base_address, u8 transfer_size, const u8* data)
{
// do not allow writes past registers
if (baseAddress + transferSize > XFMEM_REGISTERS_END)
if (base_address > XFMEM_REGISTERS_END)
{
WARN_LOG_FMT(VIDEO, "XF load exceeds address space: {:x} {} bytes", baseAddress, transferSize);
DolphinAnalytics::Instance().ReportGameQuirk(GameQuirk::USES_UNKNOWN_XF_COMMAND);
WARN_LOG_FMT(VIDEO, "XF load base address past end of address space: {:x} {} bytes",
base_address, transfer_size);
return;
}

if (baseAddress >= XFMEM_REGISTERS_END)
transferSize = 0;
else
transferSize = XFMEM_REGISTERS_END - baseAddress;
u32 end_address = base_address + transfer_size; // exclusive

// do not allow writes past registers
if (end_address > XFMEM_REGISTERS_END)
{
WARN_LOG_FMT(VIDEO, "XF load ends past end of address space: {:x} {} bytes", base_address,
transfer_size);
end_address = XFMEM_REGISTERS_END;
}

// write to XF mem
if (baseAddress < XFMEM_REGISTERS_START && transferSize > 0)
if (base_address < XFMEM_REGISTERS_START)
{
u32 end = baseAddress + transferSize;

u32 xfMemBase = baseAddress;
u32 xfMemTransferSize = transferSize;

if (end >= XFMEM_REGISTERS_START)
{
xfMemTransferSize = XFMEM_REGISTERS_START - baseAddress;
const u32 xf_mem_base = base_address;
u32 xf_mem_transfer_size = transfer_size;

baseAddress = XFMEM_REGISTERS_START;
transferSize = end - XFMEM_REGISTERS_START;
}
else
if (end_address > XFMEM_REGISTERS_START)
{
transferSize = 0;
xf_mem_transfer_size = XFMEM_REGISTERS_START - base_address;
base_address = XFMEM_REGISTERS_START;
}

XFMemWritten(xfMemTransferSize, xfMemBase);
for (u32 i = 0; i < xfMemTransferSize; i++)
XFMemWritten(xf_mem_transfer_size, xf_mem_base);
for (u32 i = 0; i < xf_mem_transfer_size; i++)
{
((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>();
((u32*)&xfmem)[xf_mem_base + i] = Common::swap32(data);
data += 4;
}
}

// write to XF regs
if (transferSize > 0)
if (base_address >= XFMEM_REGISTERS_START)
{
XFRegWritten(transferSize, baseAddress, src);
for (u32 i = 0; i < transferSize; i++)
for (u32 address = base_address; address < end_address; address++)
{
((u32*)&xfmem)[baseAddress + i] = src.Read<u32>();
}
}
}
const u32 value = Common::swap32(data);

constexpr std::tuple<u32, u32, u32> ExtractIndexedXF(u32 val)
{
const u32 index = val >> 16;
const u32 address = val & 0xFFF; // check mask
const u32 size = ((val >> 12) & 0xF) + 1;
XFRegWritten(address, value);
((u32*)&xfmem)[address] = value;

return {index, address, size};
data += 4;
}
}
}

// TODO - verify that it is correct. Seems to work, though.
void LoadIndexedXF(u32 val, int refarray)
void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(val);
// load stuff from array to address in xf mem

u32* currData = (u32*)(&xfmem) + address;
@@ -287,8 +258,8 @@ void LoadIndexedXF(u32 val, int refarray)
}
else
{
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] +
g_main_cp_state.array_strides[refarray] * index);
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] +
g_main_cp_state.array_strides[array] * index);
}
bool changed = false;
for (u32 i = 0; i < size; ++i)
@@ -307,12 +278,10 @@ void LoadIndexedXF(u32 val, int refarray)
}
}

void PreprocessIndexedXF(u32 val, int refarray)
void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(val);

const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] +
g_preprocess_cp_state.array_strides[refarray] * index);
const u8* new_data = Memory::GetPointer(g_preprocess_cp_state.array_bases[array] +
g_preprocess_cp_state.array_strides[array] * index);

const size_t buf_size = size * sizeof(u32);
Fifo::PushFifoAuxBuffer(new_data, buf_size);
@@ -581,13 +550,9 @@ std::string GetXFMemDescription(u32 address, u32 value)
}
}

std::pair<std::string, std::string> GetXFTransferInfo(const u8* data)
std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
const u8* data)
{
const u32 cmd = Common::swap32(data);
data += 4;
u32 base_address = cmd & 0xFFFF;
const u32 transfer_size = ((cmd >> 16) & 15) + 1;

if (base_address > XFMEM_REGISTERS_END)
{
return std::make_pair("Invalid XF Transfer", "Base address past end of address space");
@@ -655,10 +620,9 @@ std::pair<std::string, std::string> GetXFTransferInfo(const u8* data)
return std::make_pair(fmt::to_string(name), fmt::to_string(desc));
}

std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value)
std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size)
{
const auto [index, address, size] = ExtractIndexedXF(value);

const auto desc = fmt::format("Load {} bytes to XF address {:03x} from CP array {} row {}", size,
address, array, index);
fmt::memory_buffer written;
@@ -11,5 +11,7 @@
std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value);
std::string GetXFMemName(u32 address);
std::string GetXFMemDescription(u32 address, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data);
std::pair<std::string, std::string> GetXFIndexedLoadInfo(u8 array, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(u16 base_address, u8 transfer_size,
const u8* data);
std::pair<std::string, std::string> GetXFIndexedLoadInfo(CPArray array, u32 index, u16 address,
u8 size);
@@ -46,6 +46,12 @@ TEST(EnumUtil, Enum1)
EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(3)), "0x3u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(4)), "0x4u /* Invalid */");

EXPECT_EQ(fmt::format("{:n}", Enum1::A), "A");
EXPECT_EQ(fmt::format("{:n}", Enum1::B), "B");
EXPECT_EQ(fmt::format("{:n}", Enum1::C), "C");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(3)), "Invalid (3)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum1>(4)), "Invalid (4)");
}

TEST(EnumUtil, Enum2)
@@ -63,4 +69,11 @@ TEST(EnumUtil, Enum2)
EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(-1)), "0xffffffffu /* Invalid */");

EXPECT_EQ(fmt::format("{:n}", Enum2::D), "D");
EXPECT_EQ(fmt::format("{:n}", Enum2::E), "E");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(2)), "Invalid (2)");
EXPECT_EQ(fmt::format("{:n}", Enum2::F), "F");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(4)), "Invalid (4)");
EXPECT_EQ(fmt::format("{:n}", static_cast<Enum2>(-1)), "Invalid (-1)");
}
@@ -174,8 +174,8 @@ TEST_P(VertexLoaderParamTest, PositionAll)
Input<u8>(i);
else
Input<u16>(i);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size;
VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[CPArray::Position] = elem_count * elem_size;
}
CreateAndCheckSizes(input_size, elem_count * sizeof(float));
for (float value : values)
@@ -243,8 +243,8 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float));
Input<u16>(1);
Input<u16>(0);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;)
VertexLoaderManager::cached_arraybases[CPArray::Position] = m_src.GetPointer();
g_main_cp_state.array_strides[CPArray::Position] = sizeof(float); // ;)
Input(1.f);
Input(2.f);
Input(3.f);
@@ -357,8 +357,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)

for (int i = 0; i < NUM_VERTEX_COMPONENT_ARRAYS; i++)
{
VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer();
g_main_cp_state.array_strides[i] = 129;
VertexLoaderManager::cached_arraybases[static_cast<CPArray>(i)] = m_src.GetPointer();
g_main_cp_state.array_strides[static_cast<CPArray>(i)] = 129;
}

// This test is only done 100x in a row since it's ~20x slower using the