@@ -14,7 +14,7 @@ using namespace Arm64Gen;

constexpr ARM64Reg src_reg = ARM64Reg::X0;
constexpr ARM64Reg dst_reg = ARM64Reg::X1;
constexpr ARM64Reg count_reg = ARM64Reg::W2;
constexpr ARM64Reg remaining_reg = ARM64Reg::W2;
constexpr ARM64Reg skipped_reg = ARM64Reg::W17;
constexpr ARM64Reg scratch1_reg = ARM64Reg::W16;
constexpr ARM64Reg scratch2_reg = ARM64Reg::W15;
@@ -209,12 +209,24 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
// Z-Freeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(count_reg, 3);
FixupBranch dont_store = B(CC_GT);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache);
ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg), EncodeRegTo64(count_reg),
ArithOption(EncodeRegTo64(count_reg), ShiftType::LSL, 4));
m_float_emit.STUR(write_size, coords, EncodeRegTo64(scratch1_reg), -16);
CMP(remaining_reg, 3);
FixupBranch dont_store = B(CC_GE);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_cache.data());
m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::tangent_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[2])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::binormal_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}

@@ -403,7 +415,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
AlignCode16();
if (IsIndexed(m_VtxDesc.low.Position))
MOV(skipped_reg, ARM64Reg::WZR);
MOV(saved_count, count_reg);
ADD(saved_count, remaining_reg, 1);

MOVP2R(stride_reg, g_main_cp_state.array_strides.data());
MOVP2R(arraybase_reg, VertexLoaderManager::cached_arraybases.data());
@@ -420,10 +432,10 @@ void VertexLoaderARM64::GenerateVertexLoader()
STR(IndexType::Unsigned, scratch1_reg, dst_reg, m_dst_ofs);

// Z-Freeze
CMP(count_reg, 3);
FixupBranch dont_store = B(CC_GT);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index);
STR(IndexType::Unsigned, scratch1_reg, EncodeRegTo64(scratch2_reg), 0);
CMP(remaining_reg, 3);
FixupBranch dont_store = B(CC_GE);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::position_matrix_index_cache.data());
STR(scratch1_reg, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
SetJumpTarget(dont_store);

m_native_vtx_decl.posmtx.components = 4;
@@ -583,8 +595,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
const u8* cont = GetCodePtr();
ADD(src_reg, src_reg, m_src_ofs);

SUB(count_reg, count_reg, 1);
CBNZ(count_reg, loop_start);
SUBS(remaining_reg, remaining_reg, 1);
B(CCFlags::CC_GE, loop_start);

if (IsIndexed(m_VtxDesc.low.Position))
{
@@ -611,5 +623,5 @@ int VertexLoaderARM64::RunVertices(DataReader src, DataReader dst, int count)
{
m_numLoadedVertices += count;
return ((int (*)(u8 * src, u8 * dst, int count)) region)(src.GetPointer(), dst.GetPointer(),
count);
count - 1);
}
@@ -151,9 +151,9 @@ u32 VertexLoaderBase::GetVertexComponents(const TVtxDesc& vtx_desc, const VAT& v
// Vertices always have positions; thus there is no VB_HAS_POS as it would always be set
if (vtx_desc.low.Normal != VertexComponentFormat::NotPresent)
{
components |= VB_HAS_NRM0;
components |= VB_HAS_NORMAL;
if (vtx_attr.g0.NormalElements == NormalComponentCount::NBT)
components |= VB_HAS_NRM1 | VB_HAS_NRM2;
components |= VB_HAS_TANGENT | VB_HAS_BINORMAL;
}
for (u32 i = 0; i < vtx_desc.low.Color.Size(); i++)
{
@@ -31,11 +31,12 @@

namespace VertexLoaderManager
{
float position_cache[3][4];

// The counter added to the address of the array is 1, 2, or 3, but never zero.
// So only index 1 - 3 are used.
u32 position_matrix_index[4];
// Used by zfreeze
std::array<u32, 3> position_matrix_index_cache;
// 3 vertices, 4 floats each to allow SIMD overwrite
alignas(sizeof(std::array<float, 4>)) std::array<std::array<float, 4>, 3> position_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> tangent_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> binormal_cache;

static NativeVertexFormatMap s_native_vertex_map;
static NativeVertexFormat* s_current_vtx_fmt;
@@ -251,8 +252,9 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal
int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int count, DataReader src,
bool is_preprocess)
{
if (!count)
if (count == 0)
return 0;
ASSERT(count > 0);

VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, is_preprocess);

@@ -53,8 +53,12 @@ void UpdateVertexArrayPointers();

// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
// These arrays are in reverse order.
extern float position_cache[3][4];
extern u32 position_matrix_index[4];
extern std::array<std::array<float, 4>, 3> position_cache;
extern std::array<u32, 3> position_matrix_index_cache;
// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format
// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites.
extern std::array<float, 4> tangent_cache;
extern std::array<float, 4> binormal_cache;

// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;
@@ -26,7 +26,9 @@ static const X64Reg dst_reg = ABI_PARAM2;
static const X64Reg scratch1 = RAX;
static const X64Reg scratch2 = ABI_PARAM3;
static const X64Reg scratch3 = ABI_PARAM4;
static const X64Reg count_reg = R10;
// The remaining number of vertices to be processed. Starts at count - 1, and the final loop has it
// at 0.
static const X64Reg remaining_reg = R10;
static const X64Reg skipped_reg = R11;
static const X64Reg base_reg = RBX;

@@ -114,6 +116,35 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com

X64Reg coords = XMM0;

const auto write_zfreeze = [&]() { // zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(remaining_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_AE);
// The position cache is composed of 3 rows of 4 floats each; since each float is 4 bytes,
// we need to scale by 4 twice to cover the 4 floats.
LEA(32, scratch3, MScaled(remaining_reg, SCALE_4, 0));
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
TEST(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached tangent and binormal are 4 floats each
MOVUPS(MPIC(VertexLoaderManager::tangent_cache.data()), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[2])
{
CMP(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached tangent and binormal are 4 floats each
MOVUPS(MPIC(VertexLoaderManager::binormal_cache.data()), coords);
SetJumpTarget(dont_store);
}
};

int elem_size = GetElementSize(format);
int load_bytes = elem_size * count_in;
OpArg dest = MDisp(dst_reg, m_dst_ofs);
@@ -202,7 +233,9 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
dest.AddMemOffset(sizeof(float));

// zfreeze
if (native_format == &m_native_vtx_decl.position)
if (native_format == &m_native_vtx_decl.position ||
native_format == &m_native_vtx_decl.normals[1] ||
native_format == &m_native_vtx_decl.normals[2])
{
if (cpu_info.bSSE4_1)
{
@@ -217,16 +250,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
}
}

// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
return load_bytes;
write_zfreeze();
}
}

@@ -251,15 +275,7 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
break;
}

// zfreeze
if (native_format == &m_native_vtx_decl.position)
{
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
write_zfreeze();

return load_bytes;
}
@@ -385,16 +401,18 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col

void VertexLoaderX64::GenerateVertexLoader()
{
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2,
scratch3, count_reg, skipped_reg, base_reg};
BitSet32 regs = {src_reg, dst_reg, scratch1, scratch2,
scratch3, remaining_reg, skipped_reg, base_reg};
regs &= ABI_ALL_CALLEE_SAVED;
ABI_PushRegistersAndAdjustStack(regs, 0);

// Backup count since we're going to count it down.
PUSH(32, R(ABI_PARAM3));

// ABI_PARAM3 is one of the lower registers, so free it for scratch2.
MOV(32, R(count_reg), R(ABI_PARAM3));
// We also have it end at a value of 0, to simplify indexing for zfreeze;
// this requires subtracting 1 at the start.
LEA(32, remaining_reg, MDisp(ABI_PARAM3, -1));

MOV(64, R(base_reg), R(ABI_PARAM4));

@@ -412,9 +430,10 @@ void VertexLoaderX64::GenerateVertexLoader()
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));

// zfreeze
CMP(32, R(count_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_A);
MOV(32, MPIC(VertexLoaderManager::position_matrix_index, count_reg, SCALE_4), R(scratch1));
CMP(32, R(remaining_reg), Imm8(3));
FixupBranch dont_store = J_CC(CC_AE);
MOV(32, MPIC(VertexLoaderManager::position_matrix_index_cache.data(), remaining_reg, SCALE_4),
R(scratch1));
SetJumpTarget(dont_store);

m_native_vtx_decl.posmtx.components = 4;
@@ -513,8 +532,8 @@ void VertexLoaderX64::GenerateVertexLoader()
const u8* cont = GetCodePtr();
ADD(64, R(src_reg), Imm32(m_src_ofs));

SUB(32, R(count_reg), Imm8(1));
J_CC(CC_NZ, loop_start);
SUB(32, R(remaining_reg), Imm8(1));
J_CC(CC_AE, loop_start);

// Get the original count.
POP(32, R(ABI_RETURN));
@@ -40,14 +40,22 @@ constexpr float FracAdjust(float val)
}

template <typename T, u32 N>
void ReadIndirect(const T* data)
void ReadIndirect(VertexLoader* loader, const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
DataReader dst(g_vertex_manager_write_ptr, nullptr);

for (u32 i = 0; i < N; ++i)
{
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
const float value = FracAdjust(Common::FromBigEndian(data[i]));
if (loader->m_remaining == 0)
{
if (i >= 3 && i < 6)
VertexLoaderManager::tangent_cache[i - 3] = value;
else if (i >= 6 && i < 9)
VertexLoaderManager::binormal_cache[i - 6] = value;
}
dst.Write(value);
}

g_vertex_manager_write_ptr = dst.GetPointer();
@@ -57,43 +65,43 @@ void ReadIndirect(const T* data)
template <typename T, u32 N>
struct Normal_Direct
{
static void function([[maybe_unused]] VertexLoader* loader)
static void function(VertexLoader* loader)
{
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
ReadIndirect<T, N * 3>(loader, source);
DataSkip<N * 3 * sizeof(T)>();
}

static constexpr u32 size = sizeof(T) * N * 3;
};

template <typename I, typename T, u32 N, u32 Offset>
void Normal_Index_Offset()
void Normal_Index_Offset(VertexLoader* loader)
{
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");

const auto index = DataRead<I>();
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[CPArray::Normal] +
(index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
ReadIndirect<T, N * 3>(loader, data);
}

template <typename I, typename T, u32 N>
struct Normal_Index
{
static void function([[maybe_unused]] VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(); }
static void function(VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(loader); }
static constexpr u32 size = sizeof(I);
};

template <typename I, typename T>
struct Normal_Index_Indices3
{
static void function([[maybe_unused]] VertexLoader* loader)
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
Normal_Index_Offset<I, T, 1, 0>(loader);
Normal_Index_Offset<I, T, 1, 1>(loader);
Normal_Index_Offset<I, T, 1, 2>(loader);
}

static constexpr u32 size = sizeof(I) * 3;
@@ -41,8 +41,8 @@ void Pos_ReadDirect(VertexLoader* loader)
for (int i = 0; i < N; ++i)
{
const float value = PosScale(src.Read<T>(), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
if (loader->m_remaining < 3)
VertexLoaderManager::position_cache[loader->m_remaining][i] = value;
dst.Write(value);
}

@@ -68,8 +68,8 @@ void Pos_ReadIndex(VertexLoader* loader)
for (int i = 0; i < N; ++i)
{
const float value = PosScale(Common::FromBigEndian(data[i]), scale);
if (loader->m_counter < 3)
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
if (loader->m_remaining < 3)
VertexLoaderManager::position_cache[loader->m_remaining][i] = value;
dst.Write(value);
}

@@ -453,6 +453,7 @@ void VertexManagerBase::Flush()
}
}

CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat());
// Calculate ZSlope for zfreeze
VertexShaderManager::SetConstants();
if (!bpmem.genMode.zfreeze)
@@ -558,7 +559,7 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
{
// If this vertex format has per-vertex position matrix IDs, look it up.
if (vert_decl.posmtx.enable)
mtxIdx = VertexLoaderManager::position_matrix_index[3 - i];
mtxIdx = VertexLoaderManager::position_matrix_index_cache[2 - i];

if (vert_decl.position.components == 2)
VertexLoaderManager::position_cache[2 - i][2] = 0;
@@ -595,6 +596,31 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
m_zslope.dirty = true;
}

void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
{
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();

// Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals
// (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are
// present, though)
if (vert_decl.normals[1].enable)
return;

VertexLoaderManager::tangent_cache[3] = 0;
VertexLoaderManager::binormal_cache[3] = 0;

if (VertexShaderManager::constants.cached_tangent != VertexLoaderManager::tangent_cache)
{
VertexShaderManager::constants.cached_tangent = VertexLoaderManager::tangent_cache;
VertexShaderManager::dirty = true;
}
if (VertexShaderManager::constants.cached_binormal != VertexLoaderManager::binormal_cache)
{
VertexShaderManager::constants.cached_binormal = VertexLoaderManager::binormal_cache;
VertexShaderManager::dirty = true;
}
}

void VertexManagerBase::UpdatePipelineConfig()
{
NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat();
@@ -172,6 +172,7 @@ class VertexManagerBase
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;

void CalculateZSlope(NativeVertexFormat* format);
void CalculateBinormals(NativeVertexFormat* format);
void LoadTextures();

u8* m_cur_buffer_pointer = nullptr;
@@ -39,7 +39,7 @@ VertexShaderUid GetVertexShaderUid()
switch (texinfo.texgentype)
{
case TexGenType::EmbossMap: // calculate tex coords into bump map
if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
if ((uid_data->components & (VB_HAS_TANGENT | VB_HAS_BINORMAL)) != 0)
{
// transform the light dir into tangent space
texinfo.embosslightshift = xfmem.texMtxInfo[i].embosslightshift;
@@ -105,12 +105,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);

if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
@@ -169,12 +169,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("VS_OUTPUT main(\n");

// inputs
if ((uid_data->components & VB_HAS_NRM0) != 0)
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if ((uid_data->components & VB_HAS_NRM1) != 0)
out.Write(" float3 rawnorm1 : NORMAL1,\n");
if ((uid_data->components & VB_HAS_NRM2) != 0)
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write(" float3 rawnormal : NORMAL,\n");
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write(" float3 rawtangent : TANGENT,\n");
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write(" float3 rawbinormal : BINORMAL,\n");
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write(" float4 rawcolor0 : COLOR0,\n");
if ((uid_data->components & VB_HAS_COL1) != 0)
@@ -222,60 +222,60 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// transforms
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
{
// Vertex format has a per-vertex matrix
out.Write("int posidx = int(posmtx.r);\n"
"float4 pos = float4(dot(" I_TRANSFORMMATRICES
"[posidx], rawpos), dot(" I_TRANSFORMMATRICES
"[posidx+1], rawpos), dot(" I_TRANSFORMMATRICES "[posidx+2], rawpos), 1);\n");

if ((uid_data->components & VB_HAS_NRMALL) != 0)
"float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
"float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
"float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz, N1 = " I_NORMALMATRICES
"[normidx+1].xyz, N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n");
}

if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, "
"rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.Write(
"float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
{
out.Write(
"float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
"float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
"float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
}
}
else
{
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX "[0], rawpos), dot(" I_POSNORMALMATRIX
"[1], rawpos), dot(" I_POSNORMALMATRIX "[2], rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NRM0) != 0)
{
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm0)));\n");
}
if ((uid_data->components & VB_HAS_NRM1) != 0)
{
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm1));\n");
}
if ((uid_data->components & VB_HAS_NRM2) != 0)
// One shared matrix
out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n"
"float4 P1 = " I_POSNORMALMATRIX "[1];\n"
"float4 P2 = " I_POSNORMALMATRIX "[2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX
"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX
"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX "[5].xyz, rawnorm2));\n");
out.Write("float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
"float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
"float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
}
}

if ((uid_data->components & VB_HAS_NRM0) == 0)
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("// Multiply the position vector by the position matrix\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");

// The scale of the transform matrix is used to control the size of the emboss map effect, by
// changing the scale of the transformed binormals (which only get used by emboss map texgens).
// By normalising the first transformed normal (which is used by lighting calculations and needs
// to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
// and not scaling for lighting.
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
}
else
{
out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n");
}

out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");
@@ -300,24 +300,24 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("coord.xyz = rawpos.xyz;\n");
break;
case SourceRow::Normal:
if ((uid_data->components & VB_HAS_NRM0) != 0)
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("coord.xyz = rawnorm0.xyz;\n");
out.Write("coord.xyz = rawnormal.xyz;\n");
}
break;
case SourceRow::Colors:
ASSERT(texinfo.texgentype == TexGenType::Color0 || texinfo.texgentype == TexGenType::Color1);
break;
case SourceRow::BinormalT:
if ((uid_data->components & VB_HAS_NRM1) != 0)
if ((uid_data->components & VB_HAS_TANGENT) != 0)
{
out.Write("coord.xyz = rawnorm1.xyz;\n");
out.Write("coord.xyz = rawtangent.xyz;\n");
}
break;
case SourceRow::BinormalB:
if ((uid_data->components & VB_HAS_NRM2) != 0)
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
{
out.Write("coord.xyz = rawnorm2.xyz;\n");
out.Write("coord.xyz = rawbinormal.xyz;\n");
}
break;
default:
@@ -346,22 +346,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
case TexGenType::EmbossMap: // calculate tex coords into bump map

if ((uid_data->components & (VB_HAS_NRM1 | VB_HAS_NRM2)) != 0)
{
// transform the light dir into tangent space
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i,
texinfo.embosssourceshift);
}
else
{
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
// ASSERT(0); // should have normals
out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
}
// transform the light dir into tangent space
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n",
i, texinfo.embosssourceshift);

break;
case TexGenType::Color0:
@@ -471,7 +461,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho

if (per_pixel_lighting)
{
out.Write("o.Normal = _norm0;\n"
out.Write("o.Normal = _normal;\n"
"o.WorldPos = pos.xyz;\n");
}

@@ -17,9 +17,9 @@ enum : int
{
SHADER_POSITION_ATTRIB = 0,
SHADER_POSMTX_ATTRIB = 1,
SHADER_NORM0_ATTRIB = 2,
SHADER_NORM1_ATTRIB = 3,
SHADER_NORM2_ATTRIB = 4,
SHADER_NORMAL_ATTRIB = 2,
SHADER_TANGENT_ATTRIB = 3,
SHADER_BINORMAL_ATTRIB = 4,
SHADER_COLOR0_ATTRIB = 5,
SHADER_COLOR1_ATTRIB = 6,