@@ -403,27 +403,27 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.Write(" switch ({}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow));
out.Write(" case {}u: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.Write(" case {:s}:\n", SourceRow::Geom);
out.Write(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.Write(" case {:s}:\n", SourceRow::Normal);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
VB_HAS_NRM0);
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.Write(" case {:s}:\n", SourceRow::BinormalT);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
VB_HAS_NRM1);
out.Write(" break;\n\n");
out.Write(" case {}u: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.Write(" case {:s}:\n", SourceRow::BinormalB);
out.Write(
" coord.xyz = ((components & {}u /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
VB_HAS_NRM2);
out.Write(" break;\n\n");
for (u32 i = 0; i < 8; i++)
{
out.Write(" case {}u: // XF_SRCTEX{}_INROW\n", XF_SRCTEX0_INROW + i, i);
out.Write(" case {:s}:\n", static_cast<SourceRow>(static_cast<u32>(SourceRow::Tex0) + i));
out.Write(
" coord = ((components & {}u /* VB_HAS_UV{} */) != 0u) ? float4(rawtex{}.x, rawtex{}.y, "
"1.0, 1.0) : coord;\n",
@@ -434,8 +434,8 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
"\n");

out.Write(" // Input form of AB11 sets z element to 1.0\n");
out.Write(" if ({} == {}u) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), XF_TEXINPUT_AB11);
out.Write(" if ({} == {:s}) // inputform == AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform), TexInputForm::AB11);
out.Write(" coord.z = 1.0f;\n"
"\n");

@@ -444,7 +444,7 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
out.Write(" float3 output_tex;\n"
" switch (texgentype)\n"
" {{\n");
out.Write(" case {}u: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.Write(" case {:s}:\n", TexGenType::EmbossMap);
out.Write(" {{\n");
out.Write(" uint light = {};\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift));
@@ -462,13 +462,14 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
" }}\n"
" }}\n"
" break;\n\n");
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.Write(" case {:s}:\n", TexGenType::Color0);
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
" break;\n\n");
out.Write(" case {}u: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.Write(" case {:s}:\n", TexGenType::Color1);
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
" break;\n\n");
out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
out.Write(" case {:s}:\n", TexGenType::Regular);
out.Write(" default:\n"
" {{\n");
out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
@@ -480,8 +481,8 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
out.Write(" case {}u: tmp = int(rawtex{}.z); break;\n", i, i);
out.Write(" }}\n"
"\n");
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" if ({} == {:s}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
TexSize::STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
@@ -491,8 +492,8 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
" 1.0);\n"
" }}\n"
" }} else {{\n");
out.Write(" if ({} == {}u) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
XF_TEXPROJ_STQ);
out.Write(" if ({} == {:s}) {{\n", BitfieldExtract("texMtxInfo", TexMtxInfo().projection),
TexSize::STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
@@ -526,8 +527,7 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
// When q is 0, the GameCube appears to have a special case
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
out.Write(" if (texgentype == {}u && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR);
out.Write(" if (texgentype == {:s} && output_tex.z == 0.0)\n", TexGenType::Regular);
out.Write(
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
"\n");
@@ -2,11 +2,12 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoCommon/VertexLoader.h"

#include "Common/Assert.h"
#include "Common/CommonTypes.h"

#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderUtils.h"
#include "VideoCommon/VertexLoader_Color.h"
@@ -84,20 +85,13 @@ void VertexLoader::CompileVertexTranslator()
// Reset pipeline
m_numPipelineStages = 0;

// Colors
const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
// TextureCoord
const u64 tc[8] = {m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord,
m_VtxDesc.Tex3Coord, m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord,
m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord};

u32 components = 0;

// Position in pc vertex format.
int nat_offset = 0;

// Position Matrix Index
if (m_VtxDesc.PosMatIdx)
if (m_VtxDesc.low.PosMatIdx)
{
WriteCall(PosMtx_ReadDirect_UByte);
components |= VB_HAS_POSMTXIDX;
@@ -110,62 +104,62 @@ void VertexLoader::CompileVertexTranslator()
m_VertexSize += 1;
}

if (m_VtxDesc.Tex0MatIdx)
if (m_VtxDesc.low.Tex0MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX0;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex1MatIdx)
if (m_VtxDesc.low.Tex1MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX1;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex2MatIdx)
if (m_VtxDesc.low.Tex2MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX2;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex3MatIdx)
if (m_VtxDesc.low.Tex3MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX3;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex4MatIdx)
if (m_VtxDesc.low.Tex4MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX4;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex5MatIdx)
if (m_VtxDesc.low.Tex5MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX5;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex6MatIdx)
if (m_VtxDesc.low.Tex6MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX6;
WriteCall(TexMtx_ReadDirect_UByte);
}
if (m_VtxDesc.Tex7MatIdx)
if (m_VtxDesc.low.Tex7MatIdx)
{
m_VertexSize += 1;
components |= VB_HAS_TEXMTXIDX7;
WriteCall(TexMtx_ReadDirect_UByte);
}

// Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat,
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.low.Position, m_VtxAttr.PosFormat,
m_VtxAttr.PosElements));

m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat,
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.low.Position, m_VtxAttr.PosFormat,
m_VtxAttr.PosElements);
int pos_elements = m_VtxAttr.PosElements + 2;
int pos_elements = m_VtxAttr.PosElements == CoordComponentCount::XY ? 2 : 3;
m_native_vtx_decl.position.components = pos_elements;
m_native_vtx_decl.position.enable = true;
m_native_vtx_decl.position.offset = nat_offset;
@@ -174,23 +168,24 @@ void VertexLoader::CompileVertexTranslator()
nat_offset += pos_elements * sizeof(float);

// Normals
if (m_VtxDesc.Normal != NOT_PRESENT)
if (m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat,
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.low.Normal, m_VtxAttr.NormalFormat,
m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);

TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(
m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
TPipelineFunction pFunc =
VertexLoader_Normal::GetFunction(m_VtxDesc.low.Normal, m_VtxAttr.NormalFormat,
m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);

if (pFunc == nullptr)
{
PanicAlertFmt("VertexLoader_Normal::GetFunction({} {} {} {}) returned zero!",
m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements,
m_VtxDesc.low.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements,
m_VtxAttr.NormalIndex3);
}
WriteCall(pFunc);

for (int i = 0; i < (vtx_attr.NormalElements ? 3 : 1); i++)
for (int i = 0; i < (vtx_attr.NormalElements == NormalComponentCount::NBT ? 3 : 1); i++)
{
m_native_vtx_decl.normals[i].components = 3;
m_native_vtx_decl.normals[i].enable = true;
@@ -201,43 +196,43 @@ void VertexLoader::CompileVertexTranslator()
}

components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements == 1)
if (m_VtxAttr.NormalElements == NormalComponentCount::NBT)
components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}

for (int i = 0; i < 2; i++)
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;
switch (col[i])
switch (m_VtxDesc.low.Color[i])
{
case NOT_PRESENT:
case VertexComponentFormat::NotPresent:
break;
case DIRECT:
case VertexComponentFormat::Direct:
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
case ColorFormat::RGB565:
m_VertexSize += 2;
WriteCall(Color_ReadDirect_16b_565);
break;
case FORMAT_24B_888:
case ColorFormat::RGB888:
m_VertexSize += 3;
WriteCall(Color_ReadDirect_24b_888);
break;
case FORMAT_32B_888x:
case ColorFormat::RGB888x:
m_VertexSize += 4;
WriteCall(Color_ReadDirect_32b_888x);
break;
case FORMAT_16B_4444:
case ColorFormat::RGBA4444:
m_VertexSize += 2;
WriteCall(Color_ReadDirect_16b_4444);
break;
case FORMAT_24B_6666:
case ColorFormat::RGBA6666:
m_VertexSize += 3;
WriteCall(Color_ReadDirect_24b_6666);
break;
case FORMAT_32B_8888:
case ColorFormat::RGBA8888:
m_VertexSize += 4;
WriteCall(Color_ReadDirect_32b_8888);
break;
@@ -246,53 +241,53 @@ void VertexLoader::CompileVertexTranslator()
break;
}
break;
case INDEX8:
case VertexComponentFormat::Index8:
m_VertexSize += 1;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
case ColorFormat::RGB565:
WriteCall(Color_ReadIndex8_16b_565);
break;
case FORMAT_24B_888:
case ColorFormat::RGB888:
WriteCall(Color_ReadIndex8_24b_888);
break;
case FORMAT_32B_888x:
case ColorFormat::RGB888x:
WriteCall(Color_ReadIndex8_32b_888x);
break;
case FORMAT_16B_4444:
case ColorFormat::RGBA4444:
WriteCall(Color_ReadIndex8_16b_4444);
break;
case FORMAT_24B_6666:
case ColorFormat::RGBA6666:
WriteCall(Color_ReadIndex8_24b_6666);
break;
case FORMAT_32B_8888:
case ColorFormat::RGBA8888:
WriteCall(Color_ReadIndex8_32b_8888);
break;
default:
ASSERT(0);
break;
}
break;
case INDEX16:
case VertexComponentFormat::Index16:
m_VertexSize += 2;
switch (m_VtxAttr.color[i].Comp)
{
case FORMAT_16B_565:
case ColorFormat::RGB565:
WriteCall(Color_ReadIndex16_16b_565);
break;
case FORMAT_24B_888:
case ColorFormat::RGB888:
WriteCall(Color_ReadIndex16_24b_888);
break;
case FORMAT_32B_888x:
case ColorFormat::RGB888x:
WriteCall(Color_ReadIndex16_32b_888x);
break;
case FORMAT_16B_4444:
case ColorFormat::RGBA4444:
WriteCall(Color_ReadIndex16_16b_4444);
break;
case FORMAT_24B_6666:
case ColorFormat::RGBA6666:
WriteCall(Color_ReadIndex16_24b_6666);
break;
case FORMAT_32B_8888:
case ColorFormat::RGBA8888:
WriteCall(Color_ReadIndex16_32b_8888);
break;
default:
@@ -302,7 +297,7 @@ void VertexLoader::CompileVertexTranslator()
break;
}
// Common for the three bottom cases
if (col[i] != NOT_PRESENT)
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{
components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].offset = nat_offset;
@@ -312,38 +307,40 @@ void VertexLoader::CompileVertexTranslator()
}

// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
for (int i = 0; i < 8; i++)
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = nat_offset;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;

const int format = m_VtxAttr.texCoord[i].Format;
const int elements = m_VtxAttr.texCoord[i].Elements;
const auto tc = m_VtxDesc.high.TexCoord[i].Value();
const auto format = m_VtxAttr.texCoord[i].Format;
const auto elements = m_VtxAttr.texCoord[i].Elements;

if (tc[i] != NOT_PRESENT)
if (tc != VertexComponentFormat::NotPresent)
{
ASSERT_MSG(VIDEO, DIRECT <= tc[i] && tc[i] <= INDEX16,
"Invalid texture coordinates!\n(tc[i] = %d)", (u32)tc[i]);
ASSERT_MSG(VIDEO, FORMAT_UBYTE <= format && format <= FORMAT_FLOAT,
"Invalid texture coordinates format!\n(format = %d)", format);
ASSERT_MSG(VIDEO, 0 <= elements && elements <= 1,
"Invalid number of texture coordinates elements!\n(elements = %d)", elements);
ASSERT_MSG(VIDEO, VertexComponentFormat::Direct <= tc && tc <= VertexComponentFormat::Index16,
"Invalid texture coordinates!\n(tc = %d)", (u32)tc);
ASSERT_MSG(VIDEO, ComponentFormat::UByte <= format && format <= ComponentFormat::Float,
"Invalid texture coordinates format!\n(format = %d)", (u32)format);
ASSERT_MSG(VIDEO, elements == TexComponentCount::S || elements == TexComponentCount::ST,
"Invalid number of texture coordinates elements!\n(elements = %d)", (u32)elements);

components |= VB_HAS_UV0 << i;
WriteCall(VertexLoader_TextCoord::GetFunction(tc[i], format, elements));
m_VertexSize += VertexLoader_TextCoord::GetSize(tc[i], format, elements);
WriteCall(VertexLoader_TextCoord::GetFunction(tc, format, elements));
m_VertexSize += VertexLoader_TextCoord::GetSize(tc, format, elements);
}

if (components & (VB_HAS_TEXMTXIDX0 << i))
{
m_native_vtx_decl.texcoords[i].enable = true;
if (tc[i] != NOT_PRESENT)
if (tc != VertexComponentFormat::NotPresent)
{
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
m_native_vtx_decl.texcoords[i].components = 3;
nat_offset += 12;
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
WriteCall(m_VtxAttr.texCoord[i].Elements == TexComponentCount::ST ? TexMtx_Write_Float :
TexMtx_Write_Float2);
}
else
{
@@ -354,21 +351,22 @@ void VertexLoader::CompileVertexTranslator()
}
else
{
if (tc[i] != NOT_PRESENT)
if (tc != VertexComponentFormat::NotPresent)
{
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].components = vtx_attr.texCoord[i].Elements ? 2 : 1;
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
m_native_vtx_decl.texcoords[i].components =
vtx_attr.texCoord[i].Elements == TexComponentCount::ST ? 2 : 1;
nat_offset += 4 * (vtx_attr.texCoord[i].Elements == TexComponentCount::ST ? 2 : 1);
}
}

if (tc[i] == NOT_PRESENT)
if (tc == VertexComponentFormat::NotPresent)
{
// if there's more tex coords later, have to write a dummy call
int j = i + 1;
for (; j < 8; ++j)
size_t j = i + 1;
for (; j < m_VtxDesc.high.TexCoord.Size(); ++j)
{
if (tc[j] != NOT_PRESENT)
if (m_VtxDesc.high.TexCoord[j] != VertexComponentFormat::NotPresent)
{
WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right!
break;
@@ -383,8 +381,8 @@ void VertexLoader::CompileVertexTranslator()
}
}

// indexed position formats may skip a the vertex
if (m_VtxDesc.Position & 2)
// indexed position formats may skip the vertex
if (IsIndexed(m_VtxDesc.low.Position))
{
WriteCall(SkipVertex);
}
@@ -3,6 +3,9 @@
// Refer to the license.txt file included.

#include "VideoCommon/VertexLoaderARM64.h"

#include <array>

#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"
@@ -45,11 +48,11 @@ VertexLoaderARM64::VertexLoaderARM64(const TVtxDesc& vtx_desc, const VAT& vtx_at
WriteProtect();
}

void VertexLoaderARM64::GetVertexAddr(int array, u64 attribute, ARM64Reg reg)
void VertexLoaderARM64::GetVertexAddr(int array, VertexComponentFormat attribute, ARM64Reg reg)
{
if (attribute & MASK_INDEXED)
if (IsIndexed(attribute))
{
if (attribute == INDEX8)
if (attribute == VertexComponentFormat::Index8)
{
if (m_src_ofs < 4096)
{
@@ -83,7 +86,8 @@ void VertexLoaderARM64::GetVertexAddr(int array, u64 attribute, ARM64Reg reg)

if (array == ARRAY_POSITION)
{
EOR(scratch2_reg, scratch1_reg, 0, attribute == INDEX8 ? 7 : 15); // 0xFF : 0xFFFF
EOR(scratch2_reg, scratch1_reg, 0,
attribute == VertexComponentFormat::Index8 ? 7 : 15); // 0xFF : 0xFFFF
m_skip_vertex = CBZ(scratch2_reg);
}

@@ -97,23 +101,24 @@ void VertexLoaderARM64::GetVertexAddr(int array, u64 attribute, ARM64Reg reg)
ADD(reg, src_reg, m_src_ofs);
}

s32 VertexLoaderARM64::GetAddressImm(int array, u64 attribute, Arm64Gen::ARM64Reg reg, u32 align)
s32 VertexLoaderARM64::GetAddressImm(int array, VertexComponentFormat attribute,
Arm64Gen::ARM64Reg reg, u32 align)
{
if (attribute & MASK_INDEXED || (m_src_ofs > 255 && (m_src_ofs & (align - 1))))
if (IsIndexed(attribute) || (m_src_ofs > 255 && (m_src_ofs & (align - 1))))
GetVertexAddr(array, attribute, reg);
else
return m_src_ofs;
return -1;
}

int VertexLoaderARM64::ReadVertex(u64 attribute, int format, int count_in, int count_out,
bool dequantize, u8 scaling_exponent,
int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset)
{
ARM64Reg coords = count_in == 3 ? Q31 : D31;
ARM64Reg scale = count_in == 3 ? Q30 : D30;

int elem_size = 1 << (format / 2);
int elem_size = GetElementSize(format);
int load_bytes = elem_size * count_in;
int load_size =
load_bytes == 1 ? 1 : load_bytes <= 2 ? 2 : load_bytes <= 4 ? 4 : load_bytes <= 8 ? 8 : 16;
@@ -136,24 +141,24 @@ int VertexLoaderARM64::ReadVertex(u64 attribute, int format, int count_in, int c
m_float_emit.LDR(load_size, IndexType::Unsigned, coords, src_reg, offset);
}

if (format != FORMAT_FLOAT)
if (format != ComponentFormat::Float)
{
// Extend and convert to float
switch (format)
{
case FORMAT_UBYTE:
case ComponentFormat::UByte:
m_float_emit.UXTL(8, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
m_float_emit.UXTL(16, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
break;
case FORMAT_BYTE:
case ComponentFormat::Byte:
m_float_emit.SXTL(8, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
m_float_emit.SXTL(16, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
break;
case FORMAT_USHORT:
case ComponentFormat::UShort:
m_float_emit.REV16(8, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
m_float_emit.UXTL(16, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
break;
case FORMAT_SHORT:
case ComponentFormat::Short:
m_float_emit.REV16(8, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
m_float_emit.SXTL(16, EncodeRegToDouble(coords), EncodeRegToDouble(coords));
break;
@@ -207,34 +212,34 @@ int VertexLoaderARM64::ReadVertex(u64 attribute, int format, int count_in, int c
native_format->integer = false;
m_dst_ofs += sizeof(float) * count_out;

if (attribute == DIRECT)
if (attribute == VertexComponentFormat::Direct)
m_src_ofs += load_bytes;

return load_bytes;
}

void VertexLoaderARM64::ReadColor(u64 attribute, int format, s32 offset)
void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat format, s32 offset)
{
int load_bytes = 0;
switch (format)
{
case FORMAT_24B_888:
case FORMAT_32B_888x:
case FORMAT_32B_8888:
case ColorFormat::RGB888:
case ColorFormat::RGB888x:
case ColorFormat::RGBA8888:
if (offset == -1)
LDR(IndexType::Unsigned, scratch2_reg, EncodeRegTo64(scratch1_reg), 0);
else if (offset & 3) // Not aligned - unscaled
LDUR(scratch2_reg, src_reg, offset);
else
LDR(IndexType::Unsigned, scratch2_reg, src_reg, offset);

if (format != FORMAT_32B_8888)
if (format != ColorFormat::RGBA8888)
ORRI2R(scratch2_reg, scratch2_reg, 0xFF000000);
STR(IndexType::Unsigned, scratch2_reg, dst_reg, m_dst_ofs);
load_bytes = 3 + (format != FORMAT_24B_888);
load_bytes = format == ColorFormat::RGB888 ? 3 : 4;
break;

case FORMAT_16B_565:
case ColorFormat::RGB565:
// RRRRRGGG GGGBBBBB
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
if (offset == -1)
@@ -270,7 +275,7 @@ void VertexLoaderARM64::ReadColor(u64 attribute, int format, s32 offset)
load_bytes = 2;
break;

case FORMAT_16B_4444:
case ColorFormat::RGBA4444:
// BBBBAAAA RRRRGGGG
// REV16 - RRRRGGGG BBBBAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
@@ -303,7 +308,7 @@ void VertexLoaderARM64::ReadColor(u64 attribute, int format, s32 offset)
load_bytes = 2;
break;

case FORMAT_24B_6666:
case ColorFormat::RGBA6666:
// RRRRRRGG GGGGBBBB BBAAAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
if (offset == -1)
@@ -349,7 +354,7 @@ void VertexLoaderARM64::ReadColor(u64 attribute, int format, s32 offset)
load_bytes = 3;
break;
}
if (attribute == DIRECT)
if (attribute == VertexComponentFormat::Direct)
m_src_ofs += load_bytes;
}

@@ -370,24 +375,19 @@ void VertexLoaderARM64::GenerateVertexLoader()
// We can touch all except v8-v15
// If we need to use those, we need to retain the lower 64bits(!) of the register

const u64 tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
};

bool has_tc = false;
bool has_tc_scale = false;
for (int i = 0; i < 8; i++)
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
has_tc |= tc[i] != 0;
has_tc |= m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent;
has_tc_scale |= !!m_VtxAttr.texCoord[i].Frac;
}

bool need_scale =
(m_VtxAttr.ByteDequant && m_VtxAttr.PosFrac) || (has_tc && has_tc_scale) || m_VtxDesc.Normal;
bool need_scale = (m_VtxAttr.ByteDequant && m_VtxAttr.PosFrac) || (has_tc && has_tc_scale) ||
(m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent);

AlignCode16();
if (m_VtxDesc.Position & MASK_INDEXED)
if (IsIndexed(m_VtxDesc.low.Position))
MOV(skipped_reg, WZR);
MOV(saved_count, count_reg);

@@ -399,7 +399,7 @@ void VertexLoaderARM64::GenerateVertexLoader()

const u8* loop_start = GetCodePtr();

if (m_VtxDesc.PosMatIdx)
if (m_VtxDesc.low.PosMatIdx)
{
LDRB(IndexType::Unsigned, scratch1_reg, src_reg, m_src_ofs);
AND(scratch1_reg, scratch1_reg, 0, 5);
@@ -422,58 +422,55 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_dst_ofs += sizeof(u32);
}

u32 texmatidx_ofs[8];
const u64 tm[8] = {
m_VtxDesc.Tex0MatIdx, m_VtxDesc.Tex1MatIdx, m_VtxDesc.Tex2MatIdx, m_VtxDesc.Tex3MatIdx,
m_VtxDesc.Tex4MatIdx, m_VtxDesc.Tex5MatIdx, m_VtxDesc.Tex6MatIdx, m_VtxDesc.Tex7MatIdx,
};
for (int i = 0; i < 8; i++)
std::array<u32, 8> texmatidx_ofs;
for (size_t i = 0; i < m_VtxDesc.low.TexMatIdx.Size(); i++)
{
if (tm[i])
if (m_VtxDesc.low.TexMatIdx[i])
texmatidx_ofs[i] = m_src_ofs++;
}

// Position
{
int elem_size = 1 << (m_VtxAttr.PosFormat / 2);
int load_bytes = elem_size * (m_VtxAttr.PosElements + 2);
int elem_size = GetElementSize(m_VtxAttr.PosFormat);
int pos_elements = m_VtxAttr.PosElements == CoordComponentCount::XY ? 2 : 3;
int load_bytes = elem_size * pos_elements;
int load_size =
load_bytes == 1 ? 1 : load_bytes <= 2 ? 2 : load_bytes <= 4 ? 4 : load_bytes <= 8 ? 8 : 16;
load_size <<= 3;

s32 offset =
GetAddressImm(ARRAY_POSITION, m_VtxDesc.Position, EncodeRegTo64(scratch1_reg), load_size);
int pos_elements = m_VtxAttr.PosElements + 2;
ReadVertex(m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.low.Position, EncodeRegTo64(scratch1_reg),
load_size);
ReadVertex(m_VtxDesc.low.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position, offset);
}

if (m_VtxDesc.Normal)
if (m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
static const u8 map[8] = {7, 6, 15, 14};
u8 scaling_exponent = map[m_VtxAttr.NormalFormat];
const u8 scaling_exponent = map[u32(m_VtxAttr.NormalFormat)];
const int limit = m_VtxAttr.NormalElements == NormalComponentCount::NBT ? 3 : 1;

s32 offset = -1;
for (int i = 0; i < (m_VtxAttr.NormalElements ? 3 : 1); i++)
for (int i = 0; i < (m_VtxAttr.NormalElements == NormalComponentCount::NBT ? 3 : 1); i++)
{
if (!i || m_VtxAttr.NormalIndex3)
{
int elem_size = 1 << (m_VtxAttr.NormalFormat / 2);
int elem_size = GetElementSize(m_VtxAttr.NormalFormat);

int load_bytes = elem_size * 3;
int load_size = load_bytes == 1 ?
1 :
load_bytes <= 2 ? 2 : load_bytes <= 4 ? 4 : load_bytes <= 8 ? 8 : 16;

offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.Normal, EncodeRegTo64(scratch1_reg),
offset = GetAddressImm(ARRAY_NORMAL, m_VtxDesc.low.Normal, EncodeRegTo64(scratch1_reg),
load_size << 3);

if (offset == -1)
ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch1_reg), i * elem_size * 3);
else
offset += i * elem_size * 3;
}
int bytes_read = ReadVertex(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, 3, 3, true,
int bytes_read = ReadVertex(m_VtxDesc.low.Normal, m_VtxAttr.NormalFormat, 3, 3, true,
scaling_exponent, &m_native_vtx_decl.normals[i], offset);

if (offset == -1)
@@ -483,25 +480,26 @@ void VertexLoaderARM64::GenerateVertexLoader()
}

m_native_components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements)
if (m_VtxAttr.NormalElements == NormalComponentCount::NBT)
m_native_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}

const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
for (int i = 0; i < 2; i++)
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].type = VAR_UNSIGNED_BYTE;
m_native_vtx_decl.colors[i].integer = false;

if (col[i])
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{
u32 align = 4;
if (m_VtxAttr.color[i].Comp == FORMAT_16B_565 || m_VtxAttr.color[i].Comp == FORMAT_16B_4444)
if (m_VtxAttr.color[i].Comp == ColorFormat::RGB565 ||
m_VtxAttr.color[i].Comp == ColorFormat::RGBA4444)
align = 2;

s32 offset = GetAddressImm(ARRAY_COLOR + i, col[i], EncodeRegTo64(scratch1_reg), align);
ReadColor(col[i], m_VtxAttr.color[i].Comp, offset);
s32 offset = GetAddressImm(ARRAY_COLOR + int(i), m_VtxDesc.low.Color[i],
EncodeRegTo64(scratch1_reg), align);
ReadColor(m_VtxDesc.low.Color[i], m_VtxAttr.color[i].Comp, offset);
m_native_components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
@@ -512,31 +510,32 @@ void VertexLoaderARM64::GenerateVertexLoader()
}
}

for (int i = 0; i < 8; i++)
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;

int elements = m_VtxAttr.texCoord[i].Elements + 1;
if (tc[i])
int elements = m_VtxAttr.texCoord[i].Elements == TexComponentCount::S ? 1 : 2;
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
m_native_components |= VB_HAS_UV0 << i;

int elem_size = 1 << (m_VtxAttr.texCoord[i].Format / 2);
int elem_size = GetElementSize(m_VtxAttr.texCoord[i].Format);
int load_bytes = elem_size * (elements + 2);
int load_size = load_bytes == 1 ?
1 :
load_bytes <= 2 ? 2 : load_bytes <= 4 ? 4 : load_bytes <= 8 ? 8 : 16;
load_size <<= 3;

s32 offset =
GetAddressImm(ARRAY_TEXCOORD0 + i, tc[i], EncodeRegTo64(scratch1_reg), load_size);
s32 offset = GetAddressImm(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i],
EncodeRegTo64(scratch1_reg), load_size);
u8 scaling_exponent = m_VtxAttr.texCoord[i].Frac;
ReadVertex(tc[i], m_VtxAttr.texCoord[i].Format, elements, tm[i] ? 2 : elements,
m_VtxAttr.ByteDequant, scaling_exponent, &m_native_vtx_decl.texcoords[i], offset);
ReadVertex(m_VtxDesc.high.TexCoord[i], m_VtxAttr.texCoord[i].Format, elements,
m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.ByteDequant, scaling_exponent,
&m_native_vtx_decl.texcoords[i], offset);
}
if (tm[i])
if (m_VtxDesc.low.TexMatIdx[i])
{
m_native_components |= VB_HAS_TEXMTXIDX0 << i;
m_native_vtx_decl.texcoords[i].components = 3;
@@ -547,7 +546,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]);
m_float_emit.UCVTF(S31, scratch2_reg);

if (tc[i])
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
m_float_emit.STR(32, IndexType::Unsigned, D31, dst_reg, m_dst_ofs);
m_dst_ofs += sizeof(float);
@@ -587,7 +586,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
SUB(count_reg, count_reg, 1);
CBNZ(count_reg, loop_start);

if (m_VtxDesc.Position & MASK_INDEXED)
if (IsIndexed(m_VtxDesc.low.Position))
{
SUB(W0, saved_count, skipped_reg);
RET(X30);
@@ -9,6 +9,9 @@
#include "VideoCommon/VertexLoaderBase.h"

class DataReader;
enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;

class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlock
{
@@ -25,10 +28,11 @@ class VertexLoaderARM64 : public VertexLoaderBase, public Arm64Gen::ARM64CodeBlo
u32 m_dst_ofs = 0;
Arm64Gen::FixupBranch m_skip_vertex;
Arm64Gen::ARM64FloatEmitter m_float_emit;
void GetVertexAddr(int array, u64 attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, u64 attribute, Arm64Gen::ARM64Reg reg, u32 align);
int ReadVertex(u64 attribute, int format, int count_in, int count_out, bool dequantize,
u8 scaling_exponent, AttributeFormat* native_format, s32 offset = -1);
void ReadColor(u64 attribute, int format, s32 offset);
void GetVertexAddr(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg);
s32 GetAddressImm(int array, VertexComponentFormat attribute, Arm64Gen::ARM64Reg reg, u32 align);
int ReadVertex(VertexComponentFormat attribute, ComponentFormat format, int count_in,
int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset = -1);
void ReadColor(VertexComponentFormat attribute, ColorFormat format, s32 offset);
void GenerateVertexLoader();
};
@@ -81,64 +81,33 @@ std::string VertexLoaderBase::ToString() const
dest += GetName();
dest += ": ";

static constexpr std::array<const char*, 4> pos_mode{{
"Inv",
"Dir",
"I8",
"I16",
}};
static constexpr std::array<const char*, 8> pos_formats{{
"u8",
"s8",
"u16",
"s16",
"flt",
"Inv",
"Inv",
"Inv",
}};
static constexpr std::array<const char*, 8> color_format{{
"565",
"888",
"888x",
"4444",
"6666",
"8888",
"Inv",
"Inv",
}};

dest += fmt::format("{}b skin: {} P: {} {}-{} ", m_VertexSize, m_VtxDesc.PosMatIdx,
m_VtxAttr.PosElements ? 3 : 2, pos_mode[m_VtxDesc.Position],
pos_formats[m_VtxAttr.PosFormat]);

if (m_VtxDesc.Normal)
dest += fmt::format("{}b skin: {} P: {} {}-{} ", m_VertexSize, m_VtxDesc.low.PosMatIdx,
m_VtxAttr.PosElements, m_VtxDesc.low.Position, m_VtxAttr.PosFormat);

if (m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
dest += fmt::format("Nrm: {} {}-{} ", m_VtxAttr.NormalElements, pos_mode[m_VtxDesc.Normal],
pos_formats[m_VtxAttr.NormalFormat]);
dest += fmt::format("Nrm: {} {}-{} ", m_VtxAttr.NormalElements, m_VtxDesc.low.Normal,
m_VtxAttr.NormalFormat);
}

const std::array<u64, 2> color_mode{{m_VtxDesc.Color0, m_VtxDesc.Color1}};
for (size_t i = 0; i < color_mode.size(); i++)
for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
{
if (color_mode[i] == 0)
if (g_main_cp_state.vtx_desc.low.Color[i] == VertexComponentFormat::NotPresent)
continue;

const auto& color = m_VtxAttr.color[i];
dest += fmt::format("C{}: {} {}-{} ", i, color.Elements, pos_mode[color_mode[i]],
color_format[color.Comp]);
dest += fmt::format("C{}: {} {}-{} ", i, color.Elements, g_main_cp_state.vtx_desc.low.Color[i],
color.Comp);
}
const std::array<u64, 8> tex_mode{{m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord,
m_VtxDesc.Tex3Coord, m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord,
m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord}};
for (size_t i = 0; i < tex_mode.size(); i++)

for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
{
if (tex_mode[i] == 0)
if (g_main_cp_state.vtx_desc.high.TexCoord[i] == VertexComponentFormat::NotPresent)
continue;

const auto& tex_coord = m_VtxAttr.texCoord[i];
dest += fmt::format("T{}: {} {}-{} ", i, tex_coord.Elements, pos_mode[tex_mode[i]],
pos_formats[tex_coord.Format]);
dest += fmt::format("T{}: {} {}-{} ", i, tex_coord.Elements,
g_main_cp_state.vtx_desc.high.TexCoord[i], tex_coord.Format);
}
dest += fmt::format(" - {} v", m_numLoadedVertices);
return dest;
@@ -200,8 +169,9 @@ class VertexLoaderTester : public VertexLoaderBase
{
ERROR_LOG_FMT(VIDEO,
"The two vertex loaders have loaded different data "
"(guru meditation {:#018x}, {:#010x}, {:#010x}, {:#010x}).",
m_VtxDesc.Hex, m_vat.g0.Hex, m_vat.g1.Hex, m_vat.g2.Hex);
"(guru meditation {:#010x}, {:#010x}, {:#010x}, {:#010x}, {:#010x}).",
m_VtxDesc.low.Hex, m_VtxDesc.high.Hex, m_vat.g0.Hex, m_vat.g1.Hex,
m_vat.g2.Hex);
}

memcpy(dst.GetPointer(), buffer_a.data(), count_a * m_native_vtx_decl.stride);
@@ -23,8 +23,8 @@ class VertexLoaderUID
VertexLoaderUID() {}
VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat)
{
vid[0] = vtx_desc.Hex & 0xFFFFFFFF;
vid[1] = vtx_desc.Hex >> 32;
vid[0] = vtx_desc.GetLegacyHex0();
vid[1] = vtx_desc.GetLegacyHex1();
vid[2] = vat.g0.Hex;
vid[3] = vat.g1.Hex;
vid[4] = vat.g2.Hex;
@@ -15,9 +15,11 @@

#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Core/HW/Memmap.h"

#include "VideoCommon/BPMemory.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/IndexGenerator.h"
@@ -75,11 +77,26 @@ void UpdateVertexArrayPointers()
// But the vertex arrays with invalid addresses aren't actually enabled.
// Note: Only array bases 0 through 11 are used by the Vertex loaders.
// 12 through 15 are used for loading data into xfmem.
for (int i = 0; i < 12; i++)
// We also only update the array base if the vertex description states we are going to use it.
if (IsIndexed(g_main_cp_state.vtx_desc.low.Position))
cached_arraybases[ARRAY_POSITION] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_POSITION]);

if (IsIndexed(g_main_cp_state.vtx_desc.low.Normal))
cached_arraybases[ARRAY_NORMAL] = Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_NORMAL]);

for (size_t i = 0; i < g_main_cp_state.vtx_desc.low.Color.Size(); i++)
{
if (IsIndexed(g_main_cp_state.vtx_desc.low.Color[i]))
cached_arraybases[ARRAY_COLOR + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_COLOR + i]);
}

for (size_t i = 0; i < g_main_cp_state.vtx_desc.high.TexCoord.Size(); i++)
{
// Only update the array base if the vertex description states we are going to use it.
if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) & MASK_INDEXED)
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
if (IsIndexed(g_main_cp_state.vtx_desc.high.TexCoord[i]))
cached_arraybases[ARRAY_TEXCOORD0 + i] =
Memory::GetPointer(g_main_cp_state.array_bases[ARRAY_TEXCOORD0 + i]);
}

g_main_cp_state.bases_dirty = false;
@@ -276,7 +293,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
// slope.
bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);
bool cullall = (bpmem.genMode.cullmode == CullMode::All && primitive < 5);

DataReader dst = g_vertex_manager->PrepareForAdditionalData(
primitive, count, loader->m_native_vtx_decl.stride, cullall);
@@ -302,79 +319,84 @@ void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & 0xF0)
switch (sub_cmd & CP_COMMAND_MASK)
{
case 0x30:
case MATINDEX_A:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;

case 0x40:
case MATINDEX_B:
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;

case 0x50:
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
state->vtx_desc.Hex |= value;
state->attr_dirty = BitSet32::AllTrue(8);
case VCD_LO:
state->vtx_desc.low.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;

case 0x60:
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
state->vtx_desc.Hex |= (u64)value << 17;
state->attr_dirty = BitSet32::AllTrue(8);
case VCD_HI:
state->vtx_desc.high.Hex = value;
state->attr_dirty = BitSet32::AllTrue(CP_NUM_VAT_REG);
state->bases_dirty = true;
break;

case 0x70:
ASSERT((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
case CP_VAT_REG_A:
if ((sub_cmd - CP_VAT_REG_A) >= CP_NUM_VAT_REG)
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_A: Invalid VAT {}", sub_cmd - CP_VAT_REG_A);
state->vtx_attr[sub_cmd & CP_VAT_MASK].g0.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

case 0x80:
ASSERT((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
case CP_VAT_REG_B:
if ((sub_cmd - CP_VAT_REG_B) >= CP_NUM_VAT_REG)
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_B: Invalid VAT {}", sub_cmd - CP_VAT_REG_B);
state->vtx_attr[sub_cmd & CP_VAT_MASK].g1.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

case 0x90:
ASSERT((sub_cmd & 0x0F) < 8);
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
state->attr_dirty[sub_cmd & 7] = true;
case CP_VAT_REG_C:
if ((sub_cmd - CP_VAT_REG_C) >= CP_NUM_VAT_REG)
WARN_LOG_FMT(VIDEO, "CP_VAT_REG_C: Invalid VAT {}", sub_cmd - CP_VAT_REG_C);
state->vtx_attr[sub_cmd & CP_VAT_MASK].g2.Hex = value;
state->attr_dirty[sub_cmd & CP_VAT_MASK] = true;
break;

// Pointers to vertex arrays in GC RAM
case 0xA0:
state->array_bases[sub_cmd & 0xF] = value & CommandProcessor::GetPhysicalAddressMask();
case ARRAY_BASE:
state->array_bases[sub_cmd & CP_ARRAY_MASK] =
value & CommandProcessor::GetPhysicalAddressMask();
state->bases_dirty = true;
break;

case 0xB0:
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
case ARRAY_STRIDE:
state->array_strides[sub_cmd & CP_ARRAY_MASK] = value & 0xFF;
break;

default:
WARN_LOG_FMT(VIDEO, "Unknown CP register {:02x} set to {:08x}", sub_cmd, value);
}
}

void FillCPMemoryArray(u32* memory)
{
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
memory[MATINDEX_A] = g_main_cp_state.matrix_index_a.Hex;
memory[MATINDEX_B] = g_main_cp_state.matrix_index_b.Hex;
memory[VCD_LO] = g_main_cp_state.vtx_desc.low.Hex;
memory[VCD_HI] = g_main_cp_state.vtx_desc.high.Hex;

for (int i = 0; i < 8; ++i)
for (int i = 0; i < CP_NUM_VAT_REG; ++i)
{
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
memory[CP_VAT_REG_A + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[CP_VAT_REG_B + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[CP_VAT_REG_C + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}

for (int i = 0; i < 16; ++i)
for (int i = 0; i < CP_NUM_ARRAYS; ++i)
{
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
memory[ARRAY_BASE + i] = g_main_cp_state.array_bases[i];
memory[ARRAY_STRIDE + i] = g_main_cp_state.array_strides[i];
}
}
@@ -2,6 +2,9 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include "VideoCommon/VertexLoaderX64.h"

#include <array>
#include <cstring>
#include <string>

@@ -15,7 +18,6 @@
#include "Common/x64Emitter.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexLoaderX64.h"

using namespace Gen;

@@ -55,12 +57,12 @@ VertexLoaderX64::VertexLoaderX64(const TVtxDesc& vtx_desc, const VAT& vtx_att)
JitRegister::Register(region, GetCodePtr(), name.c_str());
}

OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
OpArg VertexLoaderX64::GetVertexAddr(int array, VertexComponentFormat attribute)
{
OpArg data = MDisp(src_reg, m_src_ofs);
if (attribute & MASK_INDEXED)
if (IsIndexed(attribute))
{
int bits = attribute == INDEX8 ? 8 : 16;
int bits = attribute == VertexComponentFormat::Index8 ? 8 : 16;
LoadAndSwap(bits, scratch1, data);
m_src_ofs += bits / 8;
if (array == ARRAY_POSITION)
@@ -78,8 +80,8 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
}
}

int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out,
bool dequantize, u8 scaling_exponent,
int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format)
{
static const __m128i shuffle_lut[5][3] = {
@@ -115,7 +117,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count

X64Reg coords = XMM0;

int elem_size = 1 << (format / 2);
int elem_size = GetElementSize(format);
int load_bytes = elem_size * count_in;
OpArg dest = MDisp(dst_reg, m_dst_ofs);

@@ -127,7 +129,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count

m_dst_ofs += sizeof(float) * count_out;

if (attribute == DIRECT)
if (attribute == VertexComponentFormat::Direct)
m_src_ofs += load_bytes;

if (cpu_info.bSSSE3)
@@ -139,12 +141,12 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
else
MOVD_xmm(coords, data);

PSHUFB(coords, MPIC(&shuffle_lut[format][count_in - 1]));
PSHUFB(coords, MPIC(&shuffle_lut[u32(format)][count_in - 1]));

// Sign-extend.
if (format == FORMAT_BYTE)
if (format == ComponentFormat::Byte)
PSRAD(coords, 24);
if (format == FORMAT_SHORT)
if (format == ComponentFormat::Short)
PSRAD(coords, 16);
}
else
@@ -153,20 +155,20 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
X64Reg temp = XMM1;
switch (format)
{
case FORMAT_UBYTE:
case ComponentFormat::UByte:
MOVD_xmm(coords, data);
PXOR(temp, R(temp));
PUNPCKLBW(coords, R(temp));
PUNPCKLWD(coords, R(temp));
break;
case FORMAT_BYTE:
case ComponentFormat::Byte:
MOVD_xmm(coords, data);
PUNPCKLBW(coords, R(coords));
PUNPCKLWD(coords, R(coords));
PSRAD(coords, 24);
break;
case FORMAT_USHORT:
case FORMAT_SHORT:
case ComponentFormat::UShort:
case ComponentFormat::Short:
switch (count_in)
{
case 1:
@@ -185,12 +187,12 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
PSHUFLW(coords, R(coords), 0xAC); // ..Z.Y.X.
break;
}
if (format == FORMAT_SHORT)
if (format == ComponentFormat::Short)
PSRAD(coords, 16);
else
PSRLD(coords, 16);
break;
case FORMAT_FLOAT:
case ComponentFormat::Float:
// Floats don't need to be scaled or converted,
// so we can just load/swap/store them directly
// and return early.
@@ -231,7 +233,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
}
}

if (format != FORMAT_FLOAT)
if (format != ComponentFormat::Float)
{
CVTDQ2PS(coords, R(coords));

@@ -265,22 +267,22 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
return load_bytes;
}

void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, ColorFormat format)
{
int load_bytes = 0;
switch (format)
{
case FORMAT_24B_888:
case FORMAT_32B_888x:
case FORMAT_32B_8888:
case ColorFormat::RGB888:
case ColorFormat::RGB888x:
case ColorFormat::RGBA8888:
MOV(32, R(scratch1), data);
if (format != FORMAT_32B_8888)
if (format != ColorFormat::RGBA8888)
OR(32, R(scratch1), Imm32(0xFF000000));
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
load_bytes = 3 + (format != FORMAT_24B_888);
load_bytes = format == ColorFormat::RGB888 ? 3 : 4;
break;

case FORMAT_16B_565:
case ColorFormat::RGB565:
// RRRRRGGG GGGBBBBB
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
@@ -320,7 +322,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
load_bytes = 2;
break;

case FORMAT_16B_4444:
case ColorFormat::RGBA4444:
// RRRRGGGG BBBBAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
LoadAndSwap(16, scratch1, data);
@@ -348,7 +350,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
load_bytes = 2;
break;

case FORMAT_24B_6666:
case ColorFormat::RGBA6666:
// RRRRRRGG GGGGBBBB BBAAAAAA
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap
@@ -380,7 +382,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
load_bytes = 3;
break;
}
if (attribute == DIRECT)
if (attribute == VertexComponentFormat::Direct)
m_src_ofs += load_bytes;
}

@@ -399,14 +401,14 @@ void VertexLoaderX64::GenerateVertexLoader()

MOV(64, R(base_reg), R(ABI_PARAM4));

if (m_VtxDesc.Position & MASK_INDEXED)
if (IsIndexed(m_VtxDesc.low.Position))
XOR(32, R(skipped_reg), R(skipped_reg));

// TODO: load constants into registers outside the main loop

const u8* loop_start = GetCodePtr();

if (m_VtxDesc.PosMatIdx)
if (m_VtxDesc.low.PosMatIdx)
{
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
AND(32, R(scratch1), Imm8(0x3F));
@@ -428,51 +430,47 @@ void VertexLoaderX64::GenerateVertexLoader()
m_dst_ofs += sizeof(u32);
}

u32 texmatidx_ofs[8];
const u64 tm[8] = {
m_VtxDesc.Tex0MatIdx, m_VtxDesc.Tex1MatIdx, m_VtxDesc.Tex2MatIdx, m_VtxDesc.Tex3MatIdx,
m_VtxDesc.Tex4MatIdx, m_VtxDesc.Tex5MatIdx, m_VtxDesc.Tex6MatIdx, m_VtxDesc.Tex7MatIdx,
};
for (int i = 0; i < 8; i++)
std::array<u32, 8> texmatidx_ofs;
for (size_t i = 0; i < m_VtxDesc.low.TexMatIdx.Size(); i++)
{
if (tm[i])
if (m_VtxDesc.low.TexMatIdx[i])
texmatidx_ofs[i] = m_src_ofs++;
}

OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.Position);
int pos_elements = 2 + m_VtxAttr.PosElements;
ReadVertex(data, m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.low.Position);
int pos_elements = m_VtxAttr.PosElements == CoordComponentCount::XY ? 2 : 3;
ReadVertex(data, m_VtxDesc.low.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position);

if (m_VtxDesc.Normal)
if (m_VtxDesc.low.Normal != VertexComponentFormat::NotPresent)
{
static const u8 map[8] = {7, 6, 15, 14};
u8 scaling_exponent = map[m_VtxAttr.NormalFormat];
const u8 scaling_exponent = map[u32(m_VtxAttr.NormalFormat)];
const int limit = m_VtxAttr.NormalElements == NormalComponentCount::NBT ? 3 : 1;

for (int i = 0; i < (m_VtxAttr.NormalElements ? 3 : 1); i++)
for (int i = 0; i < limit; i++)
{
if (!i || m_VtxAttr.NormalIndex3)
{
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.Normal);
int elem_size = 1 << (m_VtxAttr.NormalFormat / 2);
data = GetVertexAddr(ARRAY_NORMAL, m_VtxDesc.low.Normal);
int elem_size = GetElementSize(m_VtxAttr.NormalFormat);
data.AddMemOffset(i * elem_size * 3);
}
data.AddMemOffset(ReadVertex(data, m_VtxDesc.Normal, m_VtxAttr.NormalFormat, 3, 3, true,
data.AddMemOffset(ReadVertex(data, m_VtxDesc.low.Normal, m_VtxAttr.NormalFormat, 3, 3, true,
scaling_exponent, &m_native_vtx_decl.normals[i]));
}

m_native_components |= VB_HAS_NRM0;
if (m_VtxAttr.NormalElements)
if (m_VtxAttr.NormalElements == NormalComponentCount::NBT)
m_native_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
}

const u64 col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
for (int i = 0; i < 2; i++)
for (size_t i = 0; i < m_VtxDesc.low.Color.Size(); i++)
{
if (col[i])
if (m_VtxDesc.low.Color[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_COLOR + i, col[i]);
ReadColor(data, col[i], m_VtxAttr.color[i].Comp);
data = GetVertexAddr(ARRAY_COLOR + int(i), m_VtxDesc.low.Color[i]);
ReadColor(data, m_VtxDesc.low.Color[i], m_VtxAttr.color[i].Comp);
m_native_components |= VB_HAS_COL0 << i;
m_native_vtx_decl.colors[i].components = 4;
m_native_vtx_decl.colors[i].enable = true;
@@ -483,30 +481,27 @@ void VertexLoaderX64::GenerateVertexLoader()
}
}

const u64 tc[8] = {
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord,
};
for (int i = 0; i < 8; i++)
for (size_t i = 0; i < m_VtxDesc.high.TexCoord.Size(); i++)
{
int elements = m_VtxAttr.texCoord[i].Elements + 1;
if (tc[i])
int elements = m_VtxAttr.texCoord[i].Elements == TexComponentCount::ST ? 2 : 1;
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
data = GetVertexAddr(ARRAY_TEXCOORD0 + i, tc[i]);
data = GetVertexAddr(ARRAY_TEXCOORD0 + int(i), m_VtxDesc.high.TexCoord[i]);
u8 scaling_exponent = m_VtxAttr.texCoord[i].Frac;
ReadVertex(data, tc[i], m_VtxAttr.texCoord[i].Format, elements, tm[i] ? 2 : elements,
m_VtxAttr.ByteDequant, scaling_exponent, &m_native_vtx_decl.texcoords[i]);
ReadVertex(data, m_VtxDesc.high.TexCoord[i], m_VtxAttr.texCoord[i].Format, elements,
m_VtxDesc.low.TexMatIdx[i] ? 2 : elements, m_VtxAttr.ByteDequant, scaling_exponent,
&m_native_vtx_decl.texcoords[i]);
m_native_components |= VB_HAS_UV0 << i;
}
if (tm[i])
if (m_VtxDesc.low.TexMatIdx[i])
{
m_native_components |= VB_HAS_TEXMTXIDX0 << i;
m_native_vtx_decl.texcoords[i].components = 3;
m_native_vtx_decl.texcoords[i].enable = true;
m_native_vtx_decl.texcoords[i].type = VAR_FLOAT;
m_native_vtx_decl.texcoords[i].integer = false;
MOVZX(64, 8, scratch1, MDisp(src_reg, texmatidx_ofs[i]));
if (tc[i])
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
CVTSI2SS(XMM0, R(scratch1));
MOVSS(MDisp(dst_reg, m_dst_ofs), XMM0);
@@ -537,7 +532,7 @@ void VertexLoaderX64::GenerateVertexLoader()

ABI_PopRegistersAndAdjustStack(regs, 0);

if (m_VtxDesc.Position & MASK_INDEXED)
if (IsIndexed(m_VtxDesc.low.Position))
{
SUB(32, R(ABI_RETURN), R(skipped_reg));
RET();
@@ -8,6 +8,10 @@
#include "Common/x64Emitter.h"
#include "VideoCommon/VertexLoaderBase.h"

enum class VertexComponentFormat;
enum class ComponentFormat;
enum class ColorFormat;

class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
{
public:
@@ -22,9 +26,10 @@ class VertexLoaderX64 : public VertexLoaderBase, public Gen::X64CodeBlock
u32 m_src_ofs = 0;
u32 m_dst_ofs = 0;
Gen::FixupBranch m_skip_vertex;
Gen::OpArg GetVertexAddr(int array, u64 attribute);
int ReadVertex(Gen::OpArg data, u64 attribute, int format, int count_in, int count_out,
bool dequantize, u8 scaling_exponent, AttributeFormat* native_format);
void ReadColor(Gen::OpArg data, u64 attribute, int format);
Gen::OpArg GetVertexAddr(int array, VertexComponentFormat attribute);
int ReadVertex(Gen::OpArg data, VertexComponentFormat attribute, ComponentFormat format,
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format);
void ReadColor(Gen::OpArg data, VertexComponentFormat attribute, ColorFormat format);
void GenerateVertexLoader();
};
@@ -230,12 +230,15 @@ constexpr Types InitializeTable()
constexpr Types s_table = InitializeTable();
} // Anonymous namespace

u32 VertexLoader_Normal::GetSize(u64 type, u32 format, u32 elements, u32 index3)
u32 VertexLoader_Normal::GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3)
{
return s_table[type][index3][elements][format].gc_size;
return s_table[u32(type)][index3][u32(elements)][u32(format)].gc_size;
}

TPipelineFunction VertexLoader_Normal::GetFunction(u64 type, u32 format, u32 elements, u32 index3)
TPipelineFunction VertexLoader_Normal::GetFunction(VertexComponentFormat type,
ComponentFormat format,
NormalComponentCount elements, u32 index3)
{
return s_table[type][index3][elements][format].function;
return s_table[u32(type)][index3][u32(elements)][u32(format)].function;
}
@@ -7,10 +7,16 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/VertexLoader.h"

enum class VertexComponentFormat;
enum class ComponentFormat;
enum class NormalComponentCount;

class VertexLoader_Normal
{
public:
static u32 GetSize(u64 type, u32 format, u32 elements, u32 index3);
static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);

static TPipelineFunction GetFunction(u64 type, u32 format, u32 elements, u32 index3);
static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
NormalComponentCount elements, u32 index3);
};
@@ -200,12 +200,15 @@ constexpr u32 s_table_read_position_vertex_size[4][8][2] = {
};
} // Anonymous namespace

u32 VertexLoader_Position::GetSize(u64 type, u32 format, u32 elements)
u32 VertexLoader_Position::GetSize(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position_vertex_size[type][format][elements];
return s_table_read_position_vertex_size[u32(type)][u32(format)][u32(elements)];
}

TPipelineFunction VertexLoader_Position::GetFunction(u64 type, u32 format, u32 elements)
TPipelineFunction VertexLoader_Position::GetFunction(VertexComponentFormat type,
ComponentFormat format,
CoordComponentCount elements)
{
return s_table_read_position[type][format][elements];
return s_table_read_position[u32(type)][u32(format)][u32(elements)];
}
@@ -7,10 +7,16 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/VertexLoader.h"

enum class VertexComponentFormat;
enum class ComponentFormat;
enum class CoordComponentCount;

class VertexLoader_Position
{
public:
static u32 GetSize(u64 type, u32 format, u32 elements);
static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements);

static TPipelineFunction GetFunction(u64 type, u32 format, u32 elements);
static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
CoordComponentCount elements);
};
@@ -191,14 +191,17 @@ constexpr u32 s_table_read_tex_coord_vertex_size[4][8][2] = {
};
} // Anonymous namespace

u32 VertexLoader_TextCoord::GetSize(u64 type, u32 format, u32 elements)
u32 VertexLoader_TextCoord::GetSize(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord_vertex_size[type][format][elements];
return s_table_read_tex_coord_vertex_size[u32(type)][u32(format)][u32(elements)];
}

TPipelineFunction VertexLoader_TextCoord::GetFunction(u64 type, u32 format, u32 elements)
TPipelineFunction VertexLoader_TextCoord::GetFunction(VertexComponentFormat type,
ComponentFormat format,
TexComponentCount elements)
{
return s_table_read_tex_coord[type][format][elements];
return s_table_read_tex_coord[u32(type)][u32(format)][u32(elements)];
}

TPipelineFunction VertexLoader_TextCoord::GetDummyFunction()
@@ -7,12 +7,18 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/VertexLoader.h"

enum class VertexComponentFormat;
enum class ComponentFormat;
enum class TexComponentCount;

class VertexLoader_TextCoord
{
public:
static u32 GetSize(u64 type, u32 format, u32 elements);
static u32 GetSize(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements);

static TPipelineFunction GetFunction(u64 type, u32 format, u32 elements);
static TPipelineFunction GetFunction(VertexComponentFormat type, ComponentFormat format,
TexComponentCount elements);

// It is important to synchronize tcIndex.
static TPipelineFunction GetDummyFunction();
@@ -408,10 +408,10 @@ void VertexManagerBase::Flush()
for (u32 i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
if (tinfo.texgentype != TexGenType::EmbossMap)
tinfo.hex &= 0x7ff;
if (tinfo.texgentype != XF_TEXGEN_REGULAR)
tinfo.projection = 0;
if (tinfo.texgentype != TexGenType::Regular)
tinfo.projection = TexSize::ST;

PRIM_LOG("txgen{}: proj={}, input={}, gentype={}, srcrow={}, embsrc={}, emblght={}, "
"postmtx={}, postnorm={}",
@@ -430,7 +430,7 @@ void VertexManagerBase::Flush()
// Track some stats used elsewhere by the anamorphic widescreen heuristic.
if (!SConfig::GetInstance().bWii)
{
const bool is_perspective = xfmem.projection.type == GX_PERSPECTIVE;
const bool is_perspective = xfmem.projection.type == ProjectionType::Perspective;

auto& counts =
is_perspective ? m_flush_statistics.perspective : m_flush_statistics.orthographic;
@@ -39,7 +39,7 @@ VertexShaderUid GetVertexShaderUid()
// first transformation
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
case TexGenType::EmbossMap: // calculate tex coords into bump map
if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
{
// transform the light dir into tangent space
@@ -51,18 +51,19 @@ VertexShaderUid GetVertexShaderUid()
texinfo.embosssourceshift = xfmem.texMtxInfo[i].embosssourceshift;
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
case XF_TEXGEN_COLOR_STRGBC1:
case TexGenType::Color0:
case TexGenType::Color1:
break;
case XF_TEXGEN_REGULAR:
case TexGenType::Regular:
default:
uid_data->texMtxInfo_n_projection |= xfmem.texMtxInfo[i].projection << i;
uid_data->texMtxInfo_n_projection |= static_cast<u32>(xfmem.texMtxInfo[i].projection.Value())
<< i;
break;
}

uid_data->dualTexTrans_enabled = xfmem.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular)
{
auto& postInfo = uid_data->postMtxInfo[i];
postInfo.index = xfmem.postMtxInfo[i].index;
@@ -297,49 +298,48 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
case SourceRow::Geom:
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case XF_SRCNORMAL_INROW:
case SourceRow::Normal:
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.Write("coord.xyz = rawnorm0.xyz;\n");
}
break;
case XF_SRCCOLORS_INROW:
ASSERT(texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC0 ||
texinfo.texgentype == XF_TEXGEN_COLOR_STRGBC1);
case SourceRow::Colors:
ASSERT(texinfo.texgentype == TexGenType::Color0 || texinfo.texgentype == TexGenType::Color1);
break;
case XF_SRCBINORMAL_T_INROW:
case SourceRow::BinormalT:
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.Write("coord.xyz = rawnorm1.xyz;\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
case SourceRow::BinormalB:
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.Write("coord.xyz = rawnorm2.xyz;\n");
}
break;
default:
ASSERT(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if ((uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW))) != 0)
ASSERT(texinfo.sourcerow >= SourceRow::Tex0 && texinfo.sourcerow <= SourceRow::Tex7);
u32 texnum = static_cast<u32>(texinfo.sourcerow) - static_cast<u32>(SourceRow::Tex0);
if ((uid_data->components & (VB_HAS_UV0 << (texnum))) != 0)
{
out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
out.Write("coord = float4(rawtex{}.x, rawtex{}.y, 1.0, 1.0);\n", texnum, texnum);
}
break;
}
// Input form of AB11 sets z element to 1.0

if (texinfo.inputform == XF_TEXINPUT_AB11)
if (texinfo.inputform == TexInputForm::AB11)
out.Write("coord.z = 1.0;\n");

// first transformation
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
case TexGenType::EmbossMap: // calculate tex coords into bump map

if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
{
@@ -359,18 +359,18 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
}

break;
case XF_TEXGEN_COLOR_STRGBC0:
case TexGenType::Color0:
out.Write("o.tex{}.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
case TexGenType::Color1:
out.Write("o.tex{}.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
case TexGenType::Regular:
default:
if ((uid_data->components & (VB_HAS_TEXMTXIDX0 << i)) != 0)
{
out.Write("int tmp = int(rawtex{}.z);\n", i);
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
{
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
@@ -386,7 +386,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
}
else
{
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
if (static_cast<TexSize>((uid_data->texMtxInfo_n_projection >> i) & 1) == TexSize::STQ)
{
out.Write("o.tex{}.xyz = float3(dot(coord, " I_TEXMATRICES
"[{}]), dot(coord, " I_TEXMATRICES "[{}]), dot(coord, " I_TEXMATRICES
@@ -404,7 +404,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
}

// CHECKME: does this only work for regular tex gen types?
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
if (uid_data->dualTexTrans_enabled && texinfo.texgentype == TexGenType::Regular)
{
auto& postInfo = uid_data->postMtxInfo[i];

@@ -427,7 +427,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
// TODO: check if this only affects XF_TEXGEN_REGULAR
if (texinfo.texgentype == XF_TEXGEN_REGULAR)
if (texinfo.texgentype == TexGenType::Regular)
{
out.Write(
"if(o.tex{0}.z == 0.0f)\n"
@@ -9,6 +9,9 @@
#include "VideoCommon/ShaderGenCommon.h"

enum class APIType;
enum class TexInputForm : u32;
enum class TexGenType : u32;
enum class SourceRow : u32;

// TODO should be reordered
enum : int
@@ -47,9 +50,9 @@ struct vertex_shader_uid_data

struct
{
u32 inputform : 2;
u32 texgentype : 3;
u32 sourcerow : 5;
TexInputForm inputform : 2;
TexGenType texgentype : 3;
SourceRow sourcerow : 5;
u32 embosssourceshift : 3;
u32 embosslightshift : 3;
} texMtxInfo[8];
@@ -353,7 +353,7 @@ void VertexShaderManager::SetConstants()

switch (xfmem.projection.type)
{
case GX_PERSPECTIVE:
case ProjectionType::Perspective:
{
const Common::Vec2 fov =
g_freelook_camera.IsActive() ? g_freelook_camera.GetFieldOfView() : Common::Vec2{1, 1};
@@ -382,7 +382,7 @@ void VertexShaderManager::SetConstants()
}
break;

case GX_ORTHOGRAPHIC:
case ProjectionType::Orthographic:
{
g_fProjectionMatrix[0] = rawProjection[0];
g_fProjectionMatrix[1] = 0.0f;
@@ -419,7 +419,7 @@ void VertexShaderManager::SetConstants()

auto corrected_matrix = s_viewportCorrection * Common::Matrix44::FromArray(g_fProjectionMatrix);

if (g_freelook_camera.IsActive() && xfmem.projection.type == GX_PERSPECTIVE)
if (g_freelook_camera.IsActive() && xfmem.projection.type == ProjectionType::Perspective)
corrected_matrix *= g_freelook_camera.GetView();

memcpy(constants.projection.data(), corrected_matrix.data.data(), 4 * sizeof(float4));
@@ -618,9 +618,9 @@ void VertexShaderManager::SetVertexFormat(u32 components)

// The default alpha channel seems to depend on the number of components in the vertex format.
// If the vertex attribute has an alpha channel, zero is used, otherwise one.
const u32 color_chan_alpha =
(g_main_cp_state.vtx_attr[g_main_cp_state.last_id].g0.Color0Elements ^ 1) |
((g_main_cp_state.vtx_attr[g_main_cp_state.last_id].g0.Color1Elements ^ 1) << 1);
const auto g0 = g_main_cp_state.vtx_attr[g_main_cp_state.last_id].g0;
const u32 color_chan_alpha = (g0.Color0Elements == ColorComponentCount::RGB ? 1 : 0) |
(g0.Color1Elements == ColorComponentCount::RGB ? 2 : 0);
if (color_chan_alpha != constants.color_chan_alpha)
{
constants.color_chan_alpha = color_chan_alpha;

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -4,4 +4,12 @@

#pragma once

#include <string>
#include <utility>

#include "VideoCommon/XFMemory.h"

std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value);
std::string GetXFMemName(u32 address);
std::string GetXFMemDescription(u32 address, u32 value);
std::pair<std::string, std::string> GetXFTransferInfo(const u8* data);

Large diffs are not rendered by default.

@@ -5,6 +5,7 @@ add_dolphin_test(BlockingLoopTest BlockingLoopTest.cpp)
add_dolphin_test(BusyLoopTest BusyLoopTest.cpp)
add_dolphin_test(CommonFuncsTest CommonFuncsTest.cpp)
add_dolphin_test(CryptoEcTest Crypto/EcTest.cpp)
add_dolphin_test(EnumFormatterTest EnumFormatterTest.cpp)
add_dolphin_test(EventTest EventTest.cpp)
add_dolphin_test(FixedSizeQueueTest FixedSizeQueueTest.cpp)
add_dolphin_test(FlagTest FlagTest.cpp)
@@ -0,0 +1,67 @@
// Copyright 2021 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.

#include <gtest/gtest.h>

#include "Common/CommonTypes.h"
#include "Common/EnumFormatter.h"

enum class Enum1 : u32
{
A = 0,
B = 1,
C = 2,
};

template <>
struct fmt::formatter<Enum1> : EnumFormatter<Enum1::C>
{
formatter() : EnumFormatter({"A", "B", "C"}) {}
};

enum class Enum2 : s32
{
D = 0,
E = 1,
F = 3,
};

template <>
struct fmt::formatter<Enum2> : EnumFormatter<Enum2::F>
{
static constexpr array_type names = {"D", "E", nullptr, "F"};
formatter() : EnumFormatter(names) {}
};

TEST(EnumUtil, Enum1)
{
EXPECT_EQ(fmt::to_string(Enum1::A), "A (0)");
EXPECT_EQ(fmt::to_string(Enum1::B), "B (1)");
EXPECT_EQ(fmt::to_string(Enum1::C), "C (2)");
EXPECT_EQ(fmt::to_string(static_cast<Enum1>(3)), "Invalid (3)");
EXPECT_EQ(fmt::to_string(static_cast<Enum1>(4)), "Invalid (4)");

EXPECT_EQ(fmt::format("{:s}", Enum1::A), "0x0u /* A */");
EXPECT_EQ(fmt::format("{:s}", Enum1::B), "0x1u /* B */");
EXPECT_EQ(fmt::format("{:s}", Enum1::C), "0x2u /* C */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(3)), "0x3u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum1>(4)), "0x4u /* Invalid */");
}

TEST(EnumUtil, Enum2)
{
EXPECT_EQ(fmt::to_string(Enum2::D), "D (0)");
EXPECT_EQ(fmt::to_string(Enum2::E), "E (1)");
EXPECT_EQ(fmt::to_string(static_cast<Enum2>(2)), "Invalid (2)");
EXPECT_EQ(fmt::to_string(Enum2::F), "F (3)");
EXPECT_EQ(fmt::to_string(static_cast<Enum2>(4)), "Invalid (4)");
EXPECT_EQ(fmt::to_string(static_cast<Enum2>(-1)), "Invalid (-1)");

EXPECT_EQ(fmt::format("{:s}", Enum2::D), "0x0u /* D */");
EXPECT_EQ(fmt::format("{:s}", Enum2::E), "0x1u /* E */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(2)), "0x2u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", Enum2::F), "0x3u /* F */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(4)), "0x4u /* Invalid */");
EXPECT_EQ(fmt::format("{:s}", static_cast<Enum2>(-1)), "0xffffffffu /* Invalid */");
}
@@ -49,6 +49,7 @@
<ClCompile Include="Common\BusyLoopTest.cpp" />
<ClCompile Include="Common\CommonFuncsTest.cpp" />
<ClCompile Include="Common\Crypto\EcTest.cpp" />
<ClCompile Include="Common\EnumFormatterTest.cpp" />
<ClCompile Include="Common\EventTest.cpp" />
<ClCompile Include="Common\FixedSizeQueueTest.cpp" />
<ClCompile Include="Common\FlagTest.cpp" />
@@ -28,7 +28,7 @@ TEST(VertexLoaderUID, UniqueEnough)
memset(&vat, 0, sizeof(vat));
uids.insert(VertexLoaderUID(vtx_desc, vat));

vtx_desc.Hex = 0xFEDCBA9876543210ull;
vtx_desc.SetLegacyHex(0xFEDCBA9876543210ull);
EXPECT_EQ(uids.end(), uids.find(VertexLoaderUID(vtx_desc, vat)));
uids.insert(VertexLoaderUID(vtx_desc, vat));

@@ -106,29 +106,37 @@ class VertexLoaderTest : public testing::Test
std::unique_ptr<VertexLoaderBase> m_loader;
};

class VertexLoaderParamTest : public VertexLoaderTest,
public ::testing::WithParamInterface<std::tuple<int, int, int, int>>
class VertexLoaderParamTest
: public VertexLoaderTest,
public ::testing::WithParamInterface<
std::tuple<VertexComponentFormat, ComponentFormat, CoordComponentCount, int>>
{
};
INSTANTIATE_TEST_CASE_P(AllCombinations, VertexLoaderParamTest,
::testing::Combine(::testing::Values(DIRECT, INDEX8, INDEX16),
::testing::Values(FORMAT_UBYTE, FORMAT_BYTE,
FORMAT_USHORT, FORMAT_SHORT,
FORMAT_FLOAT),
::testing::Values(0, 1), // elements
::testing::Values(0, 1, 31) // frac
));
INSTANTIATE_TEST_CASE_P(
AllCombinations, VertexLoaderParamTest,
::testing::Combine(
::testing::Values(VertexComponentFormat::Direct, VertexComponentFormat::Index8,
VertexComponentFormat::Index16),
::testing::Values(ComponentFormat::UByte, ComponentFormat::Byte, ComponentFormat::UShort,
ComponentFormat::Short, ComponentFormat::Float),
::testing::Values(CoordComponentCount::XY, CoordComponentCount::XYZ),
::testing::Values(0, 1, 31) // frac
));

TEST_P(VertexLoaderParamTest, PositionAll)
{
int addr, format, elements, frac;
VertexComponentFormat addr;
ComponentFormat format;
CoordComponentCount elements;
int frac;
std::tie(addr, format, elements, frac) = GetParam();
this->m_vtx_desc.Position = addr;
this->m_vtx_desc.low.Position = addr;
this->m_vtx_attr.g0.PosFormat = format;
this->m_vtx_attr.g0.PosElements = elements;
this->m_vtx_attr.g0.PosFrac = frac;
this->m_vtx_attr.g0.ByteDequant = true;
elements += 2;
const u32 elem_size = GetElementSize(format);
const u32 elem_count = elements == CoordComponentCount::XY ? 2 : 3;

std::vector<float> values = {
std::numeric_limits<float>::lowest(),
@@ -153,68 +161,67 @@ TEST_P(VertexLoaderParamTest, PositionAll)
ASSERT_EQ(0u, values.size() % 2);
ASSERT_EQ(0u, values.size() % 3);

int count = (int)values.size() / elements;
u32 elem_size = 1 << (format / 2);
size_t input_size = elements * elem_size;
if (addr & MASK_INDEXED)
int count = (int)values.size() / elem_count;
size_t input_size = elem_count * elem_size;
if (IsIndexed(addr))
{
input_size = addr - 1;
input_size = addr == VertexComponentFormat::Index8 ? 1 : 2;
for (int i = 0; i < count; i++)
if (addr == INDEX8)
if (addr == VertexComponentFormat::Index8)
Input<u8>(i);
else
Input<u16>(i);
VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
g_main_cp_state.array_strides[ARRAY_POSITION] = elements * elem_size;
g_main_cp_state.array_strides[ARRAY_POSITION] = elem_count * elem_size;
}
CreateAndCheckSizes(input_size, elements * sizeof(float));
CreateAndCheckSizes(input_size, elem_count * sizeof(float));
for (float value : values)
{
switch (format)
{
case FORMAT_UBYTE:
case ComponentFormat::UByte:
Input((u8)value);
break;
case FORMAT_BYTE:
case ComponentFormat::Byte:
Input((s8)value);
break;
case FORMAT_USHORT:
case ComponentFormat::UShort:
Input((u16)value);
break;
case FORMAT_SHORT:
case ComponentFormat::Short:
Input((s16)value);
break;
case FORMAT_FLOAT:
case ComponentFormat::Float:
Input(value);
break;
}
}

RunVertices(count);

float scale = 1.f / (1u << (format == FORMAT_FLOAT ? 0 : frac));
float scale = 1.f / (1u << (format == ComponentFormat::Float ? 0 : frac));
for (auto iter = values.begin(); iter != values.end();)
{
float f, g;
switch (format)
{
case FORMAT_UBYTE:
case ComponentFormat::UByte:
f = (u8)*iter++;
g = (u8)*iter++;
break;
case FORMAT_BYTE:
case ComponentFormat::Byte:
f = (s8)*iter++;
g = (s8)*iter++;
break;
case FORMAT_USHORT:
case ComponentFormat::UShort:
f = (u16)*iter++;
g = (u16)*iter++;
break;
case FORMAT_SHORT:
case ComponentFormat::Short:
f = (s16)*iter++;
g = (s16)*iter++;
break;
case FORMAT_FLOAT:
case ComponentFormat::Float:
f = *iter++;
g = *iter++;
break;
@@ -228,8 +235,8 @@ TEST_P(VertexLoaderParamTest, PositionAll)

TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
{
m_vtx_desc.Position = INDEX16;
m_vtx_attr.g0.PosFormat = FORMAT_FLOAT;
m_vtx_desc.low.Position = VertexComponentFormat::Index16;
m_vtx_attr.g0.PosFormat = ComponentFormat::Float;
CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float));
Input<u16>(1);
Input<u16>(0);
@@ -246,100 +253,102 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
}

class VertexLoaderSpeedTest : public VertexLoaderTest,
public ::testing::WithParamInterface<std::tuple<int, int>>
public ::testing::WithParamInterface<std::tuple<ComponentFormat, int>>
{
};
INSTANTIATE_TEST_CASE_P(FormatsAndElements, VertexLoaderSpeedTest,
::testing::Combine(::testing::Values(FORMAT_UBYTE, FORMAT_BYTE,
FORMAT_USHORT, FORMAT_SHORT,
FORMAT_FLOAT),
::testing::Values(0, 1) // elements
));
INSTANTIATE_TEST_CASE_P(
FormatsAndElements, VertexLoaderSpeedTest,
::testing::Combine(::testing::Values(ComponentFormat::UByte, ComponentFormat::Byte,
ComponentFormat::UShort, ComponentFormat::Short,
ComponentFormat::Float),
::testing::Values(0, 1)));

TEST_P(VertexLoaderSpeedTest, PositionDirectAll)
{
int format, elements;
std::tie(format, elements) = GetParam();
const char* map[] = {"u8", "s8", "u16", "s16", "float"};
printf("format: %s, elements: %d\n", map[format], elements);
m_vtx_desc.Position = DIRECT;
ComponentFormat format;
int elements_i;
std::tie(format, elements_i) = GetParam();
CoordComponentCount elements = static_cast<CoordComponentCount>(elements_i);
fmt::print("format: {}, elements: {}\n", format, elements);
const u32 elem_count = elements == CoordComponentCount::XY ? 2 : 3;
m_vtx_desc.low.Position = VertexComponentFormat::Direct;
m_vtx_attr.g0.PosFormat = format;
m_vtx_attr.g0.PosElements = elements;
elements += 2;
size_t elem_size = static_cast<size_t>(1) << (format / 2);
CreateAndCheckSizes(elements * elem_size, elements * sizeof(float));
const size_t elem_size = GetElementSize(format);
CreateAndCheckSizes(elem_count * elem_size, elem_count * sizeof(float));
for (int i = 0; i < 1000; ++i)
RunVertices(100000);
}

TEST_P(VertexLoaderSpeedTest, TexCoordSingleElement)
{
int format, elements;
std::tie(format, elements) = GetParam();
const char* map[] = {"u8", "s8", "u16", "s16", "float"};
printf("format: %s, elements: %d\n", map[format], elements);
m_vtx_desc.Position = DIRECT;
m_vtx_attr.g0.PosFormat = FORMAT_BYTE;
m_vtx_desc.Tex0Coord = DIRECT;
ComponentFormat format;
int elements_i;
std::tie(format, elements_i) = GetParam();
TexComponentCount elements = static_cast<TexComponentCount>(elements_i);
fmt::print("format: {}, elements: {}\n", format, elements);
const u32 elem_count = elements == TexComponentCount::S ? 1 : 2;
m_vtx_desc.low.Position = VertexComponentFormat::Direct;
m_vtx_attr.g0.PosFormat = ComponentFormat::Byte;
m_vtx_desc.high.Tex0Coord = VertexComponentFormat::Direct;
m_vtx_attr.g0.Tex0CoordFormat = format;
m_vtx_attr.g0.Tex0CoordElements = elements;
elements += 1;
size_t elem_size = static_cast<size_t>(1) << (format / 2);
CreateAndCheckSizes(2 * sizeof(s8) + elements * elem_size,
2 * sizeof(float) + elements * sizeof(float));
const size_t elem_size = GetElementSize(format);
CreateAndCheckSizes(2 * sizeof(s8) + elem_count * elem_size,
2 * sizeof(float) + elem_count * sizeof(float));
for (int i = 0; i < 1000; ++i)
RunVertices(100000);
}

TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
{
// Enables most attributes in floating point indexed mode to test speed.
m_vtx_desc.PosMatIdx = 1;
m_vtx_desc.Tex0MatIdx = 1;
m_vtx_desc.Tex1MatIdx = 1;
m_vtx_desc.Tex2MatIdx = 1;
m_vtx_desc.Tex3MatIdx = 1;
m_vtx_desc.Tex4MatIdx = 1;
m_vtx_desc.Tex5MatIdx = 1;
m_vtx_desc.Tex6MatIdx = 1;
m_vtx_desc.Tex7MatIdx = 1;
m_vtx_desc.Position = INDEX16;
m_vtx_desc.Normal = INDEX16;
m_vtx_desc.Color0 = INDEX16;
m_vtx_desc.Color1 = INDEX16;
m_vtx_desc.Tex0Coord = INDEX16;
m_vtx_desc.Tex1Coord = INDEX16;
m_vtx_desc.Tex2Coord = INDEX16;
m_vtx_desc.Tex3Coord = INDEX16;
m_vtx_desc.Tex4Coord = INDEX16;
m_vtx_desc.Tex5Coord = INDEX16;
m_vtx_desc.Tex6Coord = INDEX16;
m_vtx_desc.Tex7Coord = INDEX16;

m_vtx_attr.g0.PosElements = 1; // XYZ
m_vtx_attr.g0.PosFormat = FORMAT_FLOAT;
m_vtx_attr.g0.NormalElements = 1; // NBT
m_vtx_attr.g0.NormalFormat = FORMAT_FLOAT;
m_vtx_attr.g0.Color0Elements = 1; // Has Alpha
m_vtx_attr.g0.Color0Comp = FORMAT_32B_8888;
m_vtx_attr.g0.Color1Elements = 1; // Has Alpha
m_vtx_attr.g0.Color1Comp = FORMAT_32B_8888;
m_vtx_attr.g0.Tex0CoordElements = 1; // ST
m_vtx_attr.g0.Tex0CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g1.Tex1CoordElements = 1; // ST
m_vtx_attr.g1.Tex1CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g1.Tex2CoordElements = 1; // ST
m_vtx_attr.g1.Tex2CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g1.Tex3CoordElements = 1; // ST
m_vtx_attr.g1.Tex3CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g1.Tex4CoordElements = 1; // ST
m_vtx_attr.g1.Tex4CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g2.Tex5CoordElements = 1; // ST
m_vtx_attr.g2.Tex5CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g2.Tex6CoordElements = 1; // ST
m_vtx_attr.g2.Tex6CoordFormat = FORMAT_FLOAT;
m_vtx_attr.g2.Tex7CoordElements = 1; // ST
m_vtx_attr.g2.Tex7CoordFormat = FORMAT_FLOAT;
m_vtx_desc.low.PosMatIdx = 1;
m_vtx_desc.low.Tex0MatIdx = 1;
m_vtx_desc.low.Tex1MatIdx = 1;
m_vtx_desc.low.Tex2MatIdx = 1;
m_vtx_desc.low.Tex3MatIdx = 1;
m_vtx_desc.low.Tex4MatIdx = 1;
m_vtx_desc.low.Tex5MatIdx = 1;
m_vtx_desc.low.Tex6MatIdx = 1;
m_vtx_desc.low.Tex7MatIdx = 1;
m_vtx_desc.low.Position = VertexComponentFormat::Index16;
m_vtx_desc.low.Normal = VertexComponentFormat::Index16;
m_vtx_desc.low.Color0 = VertexComponentFormat::Index16;
m_vtx_desc.low.Color1 = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex0Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex1Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex2Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex3Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex4Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex5Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex6Coord = VertexComponentFormat::Index16;
m_vtx_desc.high.Tex7Coord = VertexComponentFormat::Index16;

m_vtx_attr.g0.PosElements = CoordComponentCount::XYZ;
m_vtx_attr.g0.PosFormat = ComponentFormat::Float;
m_vtx_attr.g0.NormalElements = NormalComponentCount::NBT;
m_vtx_attr.g0.NormalFormat = ComponentFormat::Float;
m_vtx_attr.g0.Color0Elements = ColorComponentCount::RGBA;
m_vtx_attr.g0.Color0Comp = ColorFormat::RGBA8888;
m_vtx_attr.g0.Color1Elements = ColorComponentCount::RGBA;
m_vtx_attr.g0.Color1Comp = ColorFormat::RGBA8888;
m_vtx_attr.g0.Tex0CoordElements = TexComponentCount::ST;
m_vtx_attr.g0.Tex0CoordFormat = ComponentFormat::Float;
m_vtx_attr.g1.Tex1CoordElements = TexComponentCount::ST;
m_vtx_attr.g1.Tex1CoordFormat = ComponentFormat::Float;
m_vtx_attr.g1.Tex2CoordElements = TexComponentCount::ST;
m_vtx_attr.g1.Tex2CoordFormat = ComponentFormat::Float;
m_vtx_attr.g1.Tex3CoordElements = TexComponentCount::ST;
m_vtx_attr.g1.Tex3CoordFormat = ComponentFormat::Float;
m_vtx_attr.g1.Tex4CoordElements = TexComponentCount::ST;
m_vtx_attr.g1.Tex4CoordFormat = ComponentFormat::Float;
m_vtx_attr.g2.Tex5CoordElements = TexComponentCount::ST;
m_vtx_attr.g2.Tex5CoordFormat = ComponentFormat::Float;
m_vtx_attr.g2.Tex6CoordElements = TexComponentCount::ST;
m_vtx_attr.g2.Tex6CoordFormat = ComponentFormat::Float;
m_vtx_attr.g2.Tex7CoordElements = TexComponentCount::ST;
m_vtx_attr.g2.Tex7CoordFormat = ComponentFormat::Float;

CreateAndCheckSizes(33, 156);