Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #10096 from phire/consolidate_tex_unit_addressing
Consolidate TexUnit addressing
  • Loading branch information
phire committed Oct 9, 2021
2 parents 1beaa07 + 9fa2662 commit af043c0
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 99 deletions.
14 changes: 3 additions & 11 deletions Source/Core/VideoBackends/Software/DebugUtil.cpp
Expand Up @@ -52,13 +52,8 @@ void Shutdown()

static void SaveTexture(const std::string& filename, u32 texmap, s32 mip)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;

TexImage0& ti0 = texUnit.texImage0[subTexmap];

u32 width = ti0.width + 1;
u32 height = ti0.height + 1;
u32 width = bpmem.tex.GetUnit(texmap).texImage0.width + 1;
u32 height = bpmem.tex.GetUnit(texmap).texImage0.height + 1;

auto data = std::make_unique<u8[]>(width * height * 4);

Expand All @@ -80,10 +75,7 @@ void GetTextureRGBA(u8* dst, u32 texmap, s32 mip, u32 width, u32 height)

static s32 GetMaxTextureLod(u32 texmap)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;

u8 maxLod = texUnit.texMode1[subTexmap].max_lod;
u8 maxLod = bpmem.tex.GetUnit(texmap).texMode1.max_lod;
u8 mip = maxLod >> 4;
u8 fract = maxLod & 0xf;

Expand Down
7 changes: 3 additions & 4 deletions Source/Core/VideoBackends/Software/Rasterizer.cpp
Expand Up @@ -163,13 +163,12 @@ static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, fl

static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord)
{
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const u8 subTexmap = texmap & 3;
auto texUnit = bpmem.tex.GetUnit(texmap);

// LOD calculation requires data from the texture mode for bias, etc.
// it does not seem to use the actual texture size
const TexMode0& tm0 = texUnit.texMode0[subTexmap];
const TexMode1& tm1 = texUnit.texMode1[subTexmap];
const TexMode0& tm0 = texUnit.texMode0;
const TexMode1& tm1 = texUnit.texMode1;

float sDelta, tDelta;
if (tm0.diag_lod == LODType::Diagonal)
Expand Down
25 changes: 12 additions & 13 deletions Source/Core/VideoBackends/Software/TextureSampler.cpp
Expand Up @@ -74,8 +74,8 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)
bool mipLinear = false;

#if (ALLOW_MIPMAP)
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const TexMode0& tm0 = texUnit.texMode0[texmap & 3];
auto texUnit = bpmem.tex.GetUnit(texmap);
const TexMode0& tm0 = texUnit.texMode0;

const s32 lodFract = lod & 0xf;

Expand Down Expand Up @@ -115,26 +115,25 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)

void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
{
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const u8 subTexmap = texmap & 3;
auto texUnit = bpmem.tex.GetUnit(texmap);

const TexMode0& tm0 = texUnit.texMode0[subTexmap];
const TexImage0& ti0 = texUnit.texImage0[subTexmap];
const TexTLUT& texTlut = texUnit.texTlut[subTexmap];
const TexMode0& tm0 = texUnit.texMode0;
const TexImage0& ti0 = texUnit.texImage0;
const TexTLUT& texTlut = texUnit.texTlut;
const TextureFormat texfmt = ti0.format;
const TLUTFormat tlutfmt = texTlut.tlut_format;

const u8* imageSrc;
const u8* imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].cache_manually_managed)
if (texUnit.texImage1.cache_manually_managed)
{
imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE];
imageSrc = &texMem[texUnit.texImage1.tmem_even * TMEM_LINE_SIZE];
if (texfmt == TextureFormat::RGBA8)
imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE];
imageSrcOdd = &texMem[texUnit.texImage2.tmem_odd * TMEM_LINE_SIZE];
}
else
{
const u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;
const u32 imageBase = texUnit.texImage3.image_base << 5;
imageSrc = Memory::GetPointer(imageBase);
}

Expand Down Expand Up @@ -198,7 +197,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1);

if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed))
{
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt,
tlut, tlutfmt);
Expand Down Expand Up @@ -250,7 +249,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);

if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut,
tlutfmt);
else
Expand Down
168 changes: 155 additions & 13 deletions Source/Core/VideoCommon/BPMemory.h
Expand Up @@ -951,18 +951,6 @@ struct fmt::formatter<ZTex2>
}
};

struct FourTexUnits
{
TexMode0 texMode0[4];
TexMode1 texMode1[4];
TexImage0 texImage0[4];
TexImage1 texImage1[4];
TexImage2 texImage2[4];
TexImage3 texImage3[4];
TexTLUT texTlut[4];
u32 unknown[4];
};

// Geometry/other structs
enum class CullMode : u32
{
Expand Down Expand Up @@ -1979,6 +1967,160 @@ struct BPS_TmemConfig
u32 texinvalidate;
};

union AllTexUnits;

// The addressing of the texture units is a bit non-obvious.
// This struct abstracts the complexity away.
union TexUnitAddress
{
enum class Register : u32
{
SETMODE0 = 0,
SETMODE1 = 1,
SETIMAGE0 = 2,
SETIMAGE1 = 3,
SETIMAGE2 = 4,
SETIMAGE3 = 5,
SETTLUT = 6,
UNKNOWN = 7,
};

BitField<0, 2, u32> UnitIdLow;
BitField<2, 3, Register> Reg;
BitField<5, 1, u32> UnitIdHigh;

BitField<0, 6, u32> FullAddress;
u32 hex;

TexUnitAddress() : hex(0) {}
TexUnitAddress(u32 unit_id, Register reg = Register::SETMODE0) : hex(0)
{
UnitIdLow = unit_id & 3;
UnitIdHigh = unit_id >> 2;
Reg = reg;
}

static TexUnitAddress FromBPAddress(u32 Address)
{
TexUnitAddress Val;
// Clear upper two bits (which should always be 0x80)
Val.FullAddress = Address & 0x3f;
return Val;
}

u32 GetUnitID() const { return UnitIdLow | (UnitIdHigh << 2); }

private:
friend AllTexUnits;

size_t GetOffset() const { return FullAddress; }
size_t GetBPAddress() const { return FullAddress | 0x80; }

static constexpr size_t ComputeOffset(u32 unit_id)
{
// FIXME: Would be nice to construct a TexUnitAddress and get its offset,
// but that doesn't seem to be possible in c++17

// So we manually re-implement the calculation
return (unit_id & 3) | ((unit_id & 4) << 3);
}
};
static_assert(sizeof(TexUnitAddress) == sizeof(u32));

// A view of the registers of a single TexUnit
struct TexUnit
{
TexMode0 texMode0;
u32 : 32; // doing u32 : 96 is legal according to the standard, but msvc
u32 : 32; // doesn't like it. So we stack multiple lines of u32 : 32;
u32 : 32;
TexMode1 texMode1;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage0 texImage0;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage1 texImage1;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage2 texImage2;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage3 texImage3;
u32 : 32;
u32 : 32;
u32 : 32;
TexTLUT texTlut;
u32 : 32;
u32 : 32;
u32 : 32;
u32 unknown;
};
static_assert(sizeof(TexUnit) == sizeof(u32) * 4 * 7 + sizeof(u32));

union AllTexUnits
{
std::array<u32, 8 * 8> AllRegisters;

const TexUnit& GetUnit(u32 UnitId) const
{
auto address = TexUnitAddress(UnitId);
const u32* ptr = &AllRegisters[address.GetOffset()];
return *reinterpret_cast<const TexUnit*>(ptr);
}

private:
// For debuggers since GetUnit can be optimised out in release builds
template <u32 UnitId>
struct TexUnitPadding
{
static_assert(UnitId != 0, "Can't use 0 as sizeof(std::array<u32, 0>) != 0");
std::array<u32, TexUnitAddress::ComputeOffset(UnitId)> pad;
};

TexUnit tex0;
struct
{
TexUnitPadding<1> pad1;
TexUnit tex1;
};
struct
{
TexUnitPadding<2> pad2;
TexUnit tex2;
};
struct
{
TexUnitPadding<3> pad3;
TexUnit tex3;
};
struct
{
TexUnitPadding<4> pad4;
TexUnit tex4;
};
struct
{
TexUnitPadding<5> pad5;
TexUnit tex5;
};
struct
{
TexUnitPadding<6> pad6;
TexUnit tex6;
};
struct
{
TexUnitPadding<7> pad7;
TexUnit tex7;
};
};
static_assert(sizeof(AllTexUnits) == 8 * 8 * sizeof(u32));

// All of BP memory

struct BPCmd
Expand Down Expand Up @@ -2043,7 +2185,7 @@ struct BPMemory
FieldMode fieldmode; // 68
u32 unknown10[7]; // 69-6F
u32 unknown11[16]; // 70-7F
FourTexUnits tex[2]; // 80-bf
AllTexUnits tex; // 80-bf
TevStageCombiner combiners[16]; // 0xC0-0xDF
TevReg tevregs[4]; // 0xE0
FogRangeParams fogRange; // 0xE8
Expand Down
80 changes: 40 additions & 40 deletions Source/Core/VideoCommon/BPStructs.cpp
Expand Up @@ -646,48 +646,48 @@ static void BPWritten(const BPCmd& bp)
GeometryShaderManager::SetTexCoordChanged((bp.address - BPMEM_SU_SSIZE) >> 1);
}
return;
// ------------------------
// BPMEM_TX_SETMODE0 - (Texture lookup and filtering mode) LOD/BIAS Clamp, MaxAnsio, LODBIAS,
// DiagLoad, Min Filter, Mag Filter, Wrap T, S
// BPMEM_TX_SETMODE1 - (LOD Stuff) - Max LOD, Min LOD
// ------------------------
case BPMEM_TX_SETMODE0: // (0x90 for linear)
case BPMEM_TX_SETMODE0_4:
TextureCacheBase::InvalidateAllBindPoints();
return;
}

case BPMEM_TX_SETMODE1:
case BPMEM_TX_SETMODE1_4:
TextureCacheBase::InvalidateAllBindPoints();
return;
// --------------------------------------------
// BPMEM_TX_SETIMAGE0 - Texture width, height, format
// BPMEM_TX_SETIMAGE1 - even LOD address in TMEM - Image Type, Cache Height, Cache Width, TMEM
// Offset
// BPMEM_TX_SETIMAGE2 - odd LOD address in TMEM - Cache Height, Cache Width, TMEM Offset
// BPMEM_TX_SETIMAGE3 - Address of Texture in main memory
// --------------------------------------------
case BPMEM_TX_SETIMAGE0:
case BPMEM_TX_SETIMAGE0_4:
case BPMEM_TX_SETIMAGE1:
case BPMEM_TX_SETIMAGE1_4:
case BPMEM_TX_SETIMAGE2:
case BPMEM_TX_SETIMAGE2_4:
case BPMEM_TX_SETIMAGE3:
case BPMEM_TX_SETIMAGE3_4:
TextureCacheBase::InvalidateAllBindPoints();
return;
// -------------------------------
// Set a TLUT
// BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5)
// -------------------------------
case BPMEM_TX_SETTLUT:
case BPMEM_TX_SETTLUT_4:
TextureCacheBase::InvalidateAllBindPoints();
return;
if ((bp.address & 0xc0) == 0x80)
{
auto tex_address = TexUnitAddress::FromBPAddress(bp.address);

default:
break;
switch (tex_address.Reg)
{
// ------------------------
// BPMEM_TX_SETMODE0 - (Texture lookup and filtering mode) LOD/BIAS Clamp, MaxAnsio, LODBIAS,
// DiagLoad, Min Filter, Mag Filter, Wrap T, S
// BPMEM_TX_SETMODE1 - (LOD Stuff) - Max LOD, Min LOD
// ------------------------
case TexUnitAddress::Register::SETMODE0:
case TexUnitAddress::Register::SETMODE1:
TextureCacheBase::InvalidateAllBindPoints();
return;

// --------------------------------------------
// BPMEM_TX_SETIMAGE0 - Texture width, height, format
// BPMEM_TX_SETIMAGE1 - even LOD address in TMEM - Image Type, Cache Height, Cache Width,
// TMEM Offset
// BPMEM_TX_SETIMAGE2 - odd LOD address in TMEM - Cache Height, Cache Width, TMEM Offset
// BPMEM_TX_SETIMAGE3 - Address of Texture in main memory
// --------------------------------------------
case TexUnitAddress::Register::SETIMAGE0:
case TexUnitAddress::Register::SETIMAGE1:
case TexUnitAddress::Register::SETIMAGE2:
case TexUnitAddress::Register::SETIMAGE3:
TextureCacheBase::InvalidateAllBindPoints();
return;

// -------------------------------
// Set a TLUT
// BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5)
// -------------------------------
case TexUnitAddress::Register::SETTLUT:
TextureCacheBase::InvalidateAllBindPoints();
return;
case TexUnitAddress::Register::UNKNOWN:
break; // Not handled
}
}

switch (bp.address & 0xF0)
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoCommon/RenderState.cpp
Expand Up @@ -224,9 +224,9 @@ BlendingState& BlendingState::operator=(const BlendingState& rhs)

void SamplerState::Generate(const BPMemory& bp, u32 index)
{
const FourTexUnits& tex = bpmem.tex[index / 4];
const TexMode0& tm0 = tex.texMode0[index % 4];
const TexMode1& tm1 = tex.texMode1[index % 4];
auto tex = bp.tex.GetUnit(index);
const TexMode0& tm0 = tex.texMode0;
const TexMode1& tm1 = tex.texMode1;

// GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their
// sampler states. Therefore, we set the min/max LOD to zero if this option is used.
Expand Down

0 comments on commit af043c0

Please sign in to comment.