@@ -220,13 +220,10 @@ PixelShaderUid GetPixelShaderUid()

// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
for (unsigned int i = 0; i < numStages; ++i)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
if (bpmem.tevind[i].IsActive())
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}

uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
@@ -238,16 +235,14 @@ PixelShaderUid GetPixelShaderUid()

for (unsigned int n = 0; n < numStages; n++)
{
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
// HACK to handle cases where the tex gen is not enabled
if (!bHasTexCoord)
texcoord = bpmem.genMode.numtexgens;
uid_data->stagehash[n].tevorders_texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);

// hasindstage previously was used as a criterion to set tevind to 0, but there are variables in
// tevind that are used even if the indirect stage is disabled, so now it is only left in to
// avoid breaking existing UIDs (in most cases, games will have 0 in tevind anyways)
// TODO: Remove hasindstage on the next UID version bump
uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (uid_data->stagehash[n].hasindstage)
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;

TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC;
@@ -361,7 +356,7 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos
uid_data->bounding_box &= host_config.bounding_box & host_config.backend_bbox;
}

void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens,
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config, bool bounding_box)
{
// dot product for integer vectors
@@ -546,8 +541,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
uid_data->genMode_numtexgens, uid_data->genMode_numindstages);

// Stuff that is shared between ubershaders and pixelgen.
WritePixelShaderCommonHeader(out, api_type, uid_data->genMode_numtexgens, host_config,
uid_data->bounding_box);
WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box);

if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
@@ -775,9 +769,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("col1 = float4(0.0, 0.0, 0.0, 0.0);\n");
}

// HACK to handle cases where the tex gen is not enabled
if (uid_data->genMode_numtexgens == 0)
{
// TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex
// coord indices to 0. Ideally, it shouldn't exist at all, but the exact behavior hasn't been
// tested.
out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
}
else
@@ -796,24 +792,34 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
{
if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
{
const u32 texcoord = uid_data->GetTevindirefCoord(i);
u32 texcoord = uid_data->GetTevindirefCoord(i);
const u32 texmap = uid_data->GetTevindirefMap(i);

if (texcoord < uid_data->genMode_numtexgens)
{
out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) != 0 ? "zw" : "xy");
}
else
{
out.Write("\ttempcoord = int2(0, 0);\n");
}
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
// This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
// the number of tex gens to 2 (bug 11462).
if (texcoord >= uid_data->genMode_numtexgens)
texcoord = 0;

out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) ? "zw" : "xy");

out.Write("\tint3 iindtex{} = ", i);
SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type);
}
}
for (u32 i = uid_data->genMode_numindstages; i < 4; i++)
{
// Referencing a stage above the number of ind stages is undefined behavior,
// and on console produces a noise pattern (details unknown).
// TODO: This behavior is nowhere near that, but it ensures the shader still compiles.
if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
{
out.Write("\tint3 iindtex{} = int3(0, 0, 0); // Undefined behavior on console\n", i);
}
}

for (u32 i = 0; i < numStages; i++)
{
@@ -950,17 +956,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const auto& stage = uid_data->stagehash[n];
out.Write("\n\t// TEV stage {}\n", n);

// HACK to handle cases where the tex gen is not enabled
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does not
// exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
u32 texcoord = stage.tevorders_texcoord;
const bool has_tex_coord = texcoord < uid_data->genMode_numtexgens;
if (!has_tex_coord)
texcoord = 0;

if (stage.hasindstage)
{
TevStageIndirect tevind;
tevind.hex = stage.tevind;

const TevStageIndirect tevind{.hex = stage.tevind};
out.Write("\t// indirect op\n");

// Perform the indirect op on the incoming regular coordinates
@@ -991,7 +995,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// TODO: Should we reset alphabump to 0 here?
}

if (tevind.mid != 0)
if (tevind.matrix_index != IndMtxIndex::Off)
{
// format
static constexpr std::array<const char*, 4> tev_ind_fmt_mask{
@@ -1038,11 +1042,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
tev_ind_bias_add[u32(tevind.fmt.Value())]);
}

// Multiplied by 2 because each matrix has two rows.
// Note also that the 4th column of the matrix contains the scale factor.
const u32 mtxidx = 2 * (static_cast<u32>(tevind.matrix_index.Value()) - 1);

// multiply by offset matrix and scale - calculations are likely to overflow badly,
// yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
if (tevind.mid <= 3)
if (tevind.matrix_id == IndMtxId::Indirect)
{
const u32 mtxidx = 2 * (tevind.mid - 1);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);

out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX
@@ -1064,10 +1071,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 7 && has_tex_coord)
{ // s matrix
ASSERT(tevind.mid >= 5);
const u32 mtxidx = 2 * (tevind.mid - 5);
else if (tevind.matrix_id == IndMtxId::S)
{
ASSERT(has_tex_coord);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);

out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n,
@@ -1086,10 +1092,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 11 && has_tex_coord)
{ // t matrix
ASSERT(tevind.mid >= 9);
const u32 mtxidx = 2 * (tevind.mid - 9);
else if (tevind.matrix_id == IndMtxId::T)
{
ASSERT(has_tex_coord);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);

out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n,
@@ -1112,50 +1117,52 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
else
{
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
ASSERT(false); // Unknown value for matrix_id
}
}
else
{
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
// If matrix_index is Off (0), matrix_id should be Indirect (0)
ASSERT(tevind.matrix_id == IndMtxId::Indirect);
}

// ---------
// Wrapping
// ---------

// TODO: Should the last element be 1 or (1<<7)?
static constexpr std::array<const char*, 7> tev_ind_wrap_start{
"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1",
static constexpr std::array<const char*, 5> tev_ind_wrap_start{
"(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)",
};

// wrap S
if (tevind.sw == IndTexWrap::ITW_OFF)
{
out.Write("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord);
}
else if (tevind.sw == IndTexWrap::ITW_0)
else if (tevind.sw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0)
{
out.Write("\twrappedcoord.x = 0;\n");
}
else
{
out.Write("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord,
tev_ind_wrap_start[u32(tevind.sw.Value())]);
tev_ind_wrap_start[u32(tevind.sw.Value()) - u32(IndTexWrap::ITW_256)]);
}

// wrap T
if (tevind.tw == IndTexWrap::ITW_OFF)
{
out.Write("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord);
}
else if (tevind.tw == IndTexWrap::ITW_0)
else if (tevind.tw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0)
{
out.Write("\twrappedcoord.y = 0;\n");
}
else
{
out.Write("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord,
tev_ind_wrap_start[u32(tevind.tw.Value())]);
tev_ind_wrap_start[u32(tevind.tw.Value()) - u32(IndTexWrap::ITW_256)]);
}

if (tevind.fb_addprev) // add previous tevcoord
@@ -1191,7 +1198,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap);
}

if (stage.tevorders_enable)
if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0)
{
// Generate swizzle string to represent the texture color channel swapping
const char texswap[5] = {
@@ -1202,17 +1209,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0',
};

if (!stage.hasindstage)
{
// calc tevcord
if (has_tex_coord)
out.Write("\ttevcoord.xy = fixpoint_uv{};\n", texcoord);
else
out.Write("\ttevcoord.xy = int2(0, 0);\n");
}
out.Write("\ttextemp = ");
SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type);
}
else if (uid_data->genMode_numtexgens == 0)
{
// It seems like the result is always black when no tex coords are enabled, but further testing
// is needed.
out.Write("\ttextemp = int4(0, 0, 0, 0);\n");
}
else
{
out.Write("\ttextemp = int4(255, 255, 255, 255);\n");
@@ -66,9 +66,9 @@ struct pixel_shader_uid_data
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc2 : 3;
u32 tevindref_bi3 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;

void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
@@ -84,55 +84,39 @@ struct pixel_shader_uid_data
}
else if (index == 2)
{
tevindref_bc3 = texcoord;
tevindref_bc2 = texcoord;
tevindref_bi2 = texmap;
}
else if (index == 3)
{
tevindref_bc4 = texcoord;
tevindref_bi4 = texmap;
tevindref_bc3 = texcoord;
tevindref_bi3 = texmap;
}
}

u32 GetTevindirefCoord(int index) const
{
if (index == 0)
{
return tevindref_bc0;
}
else if (index == 1)
{
return tevindref_bc1;
}
else if (index == 2)
{
return tevindref_bc3;
}
return tevindref_bc2;
else if (index == 3)
{
return tevindref_bc4;
}
return tevindref_bc3;
return 0;
}

u32 GetTevindirefMap(int index) const
{
if (index == 0)
{
return tevindref_bi0;
}
else if (index == 1)
{
return tevindref_bi1;
}
else if (index == 2)
{
return tevindref_bi2;
}
else if (index == 3)
{
return tevindref_bi4;
}
return tevindref_bi3;
return 0;
}

@@ -149,6 +133,7 @@ struct pixel_shader_uid_data
u32 pad1 : 6;

// TODO: Clean up the swapXY mess
// TODO: remove hasindstage, as it no longer does anything useful
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2;
@@ -174,7 +159,7 @@ using PixelShaderUid = ShaderUid<pixel_shader_uid_data>;

ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data);
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens,
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config, bool bounding_box);
void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config,
PixelShaderUid* uid);
@@ -148,29 +148,18 @@ void PixelShaderManager::SetConstants()

for (u32 i = 0; i < (bpmem.genMode.numtevstages + 1); ++i)
{
// Note: a tevind of zero just happens to be a passthrough, so no need
// to set an extra bit. Furthermore, wrap and add to previous apply even if there is no
// indirect stage.
constants.pack1[i][2] = bpmem.tevind[i].hex;

u32 stage = bpmem.tevind[i].bt;
if (stage < bpmem.genMode.numindstages)
{
// We set some extra bits so the ubershader can quickly check if these
// features are in use.
if (bpmem.tevind[i].IsActive())
constants.pack1[stage][3] =
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
// Note: a tevind of zero just happens to be a passthrough, so no need
// to set an extra bit.
constants.pack1[i][2] = bpmem.tevind[i].hex; // TODO: This match shadergen, but videosw
// will always wrap.

// The ubershader uses tevind != 0 as a condition whether to calculate texcoords,
// even when texture is disabled, instead of the stage < bpmem.genMode.numindstages.
// We set an unused bit here to indicate that the stage is active, even if it
// is just a pass-through.
constants.pack1[i][2] |= 0x80000000;
}
else
{
constants.pack1[i][2] = 0;
}

// We use an extra bit (1 << 16) to provide a fast way of testing if this feature is in use.
// Note also that this is indexed by indirect stage, not by TEV stage.
if (bpmem.tevind[i].IsActive() && stage < bpmem.genMode.numindstages)
constants.pack1[stage][3] =
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
}

dirty = true;
@@ -336,9 +325,7 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high)

void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
{
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
const u8 scale = bpmem.indmtx[matrixidx].GetScale();

// xyz - static matrix
// w - dynamic matrix scale / 128
@@ -64,7 +64,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,

out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WritePixelShaderCommonHeader(out, ApiType, numTexgen, host_config, bounding_box);
WritePixelShaderCommonHeader(out, ApiType, host_config, bounding_box);
WriteUberShaderCommonHeader(out, ApiType, host_config);
if (per_pixel_lighting)
WriteLightingFunction(out);
@@ -148,68 +148,68 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
}

// Uniform index -> texture coordinates
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
// This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
// the number of tex gens to 2 (bug 11462).
if (numTexgen > 0)
{
if (ApiType != APIType::D3D)
{
out.Write("float3 selectTexCoord(uint index) {{\n");
}
else
{
out.Write("float3 selectTexCoord(uint index");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", float3 tex{}", i);
out.Write(") {{\n");
}
out.Write("int2 selectTexCoord(uint index");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", int2 fixpoint_uv{}", i);
out.Write(") {{\n");

if (ApiType == APIType::D3D)
{
out.Write(" switch (index) {{\n");
for (u32 i = 0; i < numTexgen; i++)
{
out.Write(" case {}u:\n"
" return tex{};\n",
" return fixpoint_uv{};\n",
i, i);
}
out.Write(" default:\n"
" return float3(0.0, 0.0, 0.0);\n"
" return fixpoint_uv0;\n"
" }}\n");
}
else
{
out.Write(" if (index >= {}u) {{\n", numTexgen);
out.Write(" return fixpoint_uv0;\n"
" }}\n");
if (numTexgen > 4)
out.Write(" if (index < 4u) {{\n");
if (numTexgen > 2)
out.Write(" if (index < 2u) {{\n");
if (numTexgen > 1)
out.Write(" return (index == 0u) ? tex0 : tex1;\n");
out.Write(" return (index == 0u) ? fixpoint_uv0 : fixpoint_uv1;\n");
else
out.Write(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv0;\n");
if (numTexgen > 2)
{
out.Write(" }} else {{\n"); // >= 2
out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen)
if (numTexgen > 3)
out.Write(" return (index == 2u) ? tex2 : tex3;\n");
out.Write(" return (index == 2u) ? fixpoint_uv2 : fixpoint_uv3;\n");
else
out.Write(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv2;\n");
out.Write(" }}\n");
}
if (numTexgen > 4)
{
out.Write(" }} else {{\n"); // >= 4 <= 8
out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen)
if (numTexgen > 6)
out.Write(" if (index < 6u) {{\n");
if (numTexgen > 5)
out.Write(" return (index == 4u) ? tex4 : tex5;\n");
out.Write(" return (index == 4u) ? fixpoint_uv4 : fixpoint_uv5;\n");
else
out.Write(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv4;\n");
if (numTexgen > 6)
{
out.Write(" }} else {{\n"); // >= 6 <= 8
out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen)
if (numTexgen > 7)
out.Write(" return (index == 6u) ? tex6 : tex7;\n");
out.Write(" return (index == 6u) ? fixpoint_uv6 : fixpoint_uv7;\n");
else
out.Write(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv6;\n");
out.Write(" }}\n");
}
out.Write(" }}\n");
@@ -287,15 +287,15 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// ======================
const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name,
std::string_view in_index_name) {
// in_index_name is the indirect stage, not the tev stage
// bpmem_iref is packed differently from RAS1_IREF
out.Write("{{\n"
" uint iref = bpmem_iref({});\n"
" if ( iref != 0u)\n"
" {{\n"
" uint texcoord = bitfieldExtract(iref, 0, 3);\n"
" uint texmap = bitfieldExtract(iref, 8, 3);\n"
" float3 uv = getTexCoord(texcoord);\n"
" int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS
"[texcoord].zw);\n"
" int2 fixedPoint_uv = getTexCoord(texcoord);\n"
"\n"
" if (({} & 1u) == 0u)\n"
" fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n"
@@ -306,6 +306,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"[texmap].xy, {})).abg;\n",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name,
stereo ? "float(layer)" : "0.0");
// There is always a bit set in bpmem_iref if the data is valid (matrix is not off, and the
// indirect texture stage is enabled). If the matrix is off, the result doesn't matter; if the
// indirect texture stage is disabled, the result is undefined (and produces a glitchy pattern
// on hardware, different from this).
out.Write(" }}\n"
" else\n"
" {{\n"
@@ -666,21 +670,14 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n");
}

// Since the texture coodinate variables aren't global, we need to pass
// them to the select function in D3D.
// Since the fixed-point texture coodinate variables aren't global, we need to pass
// them to the select function. This applies to all backends.
if (numTexgen > 0)
{
if (ApiType != APIType::D3D)
{
out.Write("#define getTexCoord(index) selectTexCoord((index))\n\n");
}
else
{
out.Write("#define getTexCoord(index) selectTexCoord((index)");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", tex{}", i);
out.Write(")\n\n");
}
out.Write("#define getTexCoord(index) selectTexCoord((index)");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", fixpoint_uv{}", i);
out.Write(")\n\n");
}

if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
@@ -788,11 +785,18 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Disable texturing when there are no texgens (for now)
if (numTexgen != 0)
{
out.Write(" uint tex_coord = {};\n",
for (u32 i = 0; i < numTexgen; i++)
{
out.Write(" int2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
// TODO: S24 overflows here?
}

out.Write("\n"
" uint tex_coord = {};\n",
BitfieldExtract<&TwoTevStageOrders::texcoord0>("ss.order"));
out.Write(" float3 uv = getTexCoord(tex_coord);\n"
" int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS
"[tex_coord].zw);\n"
out.Write(" int2 fixedPoint_uv = getTexCoord(tex_coord);\n"
"\n"
" bool texture_enabled = (ss.order & {}u) != 0u;\n",
1 << TwoTevStageOrders().enable0.StartBit());
@@ -806,7 +810,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" uint fmt = {};\n", BitfieldExtract<&TevStageIndirect::fmt>("tevind"));
out.Write(" uint bias = {};\n", BitfieldExtract<&TevStageIndirect::bias>("tevind"));
out.Write(" uint bt = {};\n", BitfieldExtract<&TevStageIndirect::bt>("tevind"));
out.Write(" uint mid = {};\n", BitfieldExtract<&TevStageIndirect::mid>("tevind"));
out.Write(" uint matrix_index = {};\n",
BitfieldExtract<&TevStageIndirect::matrix_index>("tevind"));
out.Write(" uint matrix_id = {};\n",
BitfieldExtract<&TevStageIndirect::matrix_id>("tevind"));
out.Write("\n");
out.Write(" int3 indcoord;\n");
LookupIndirectTexture("indcoord", "bt");
@@ -846,12 +853,12 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n"
" // Matrix multiply\n"
" int2 indtevtrans = int2(0, 0);\n"
" if ((mid & 3u) != 0u)\n"
" if (matrix_index != 0u)\n"
" {{\n"
" uint mtxidx = 2u * ((mid & 3u) - 1u);\n"
" uint mtxidx = 2u * (matrix_index - 1u);\n"
" int shift = " I_INDTEXMTX "[mtxidx].w;\n"
"\n"
" switch (mid >> 2)\n"
" switch (matrix_id)\n"
" {{\n"
" case 0u: // 3x2 S0.10 matrix\n"
" indtevtrans = int2(idot(" I_INDTEXMTX