Skip to content

Commit

Permalink
Merge pull request #11948 from unknownbrackets/vfpu
Browse files Browse the repository at this point in the history
Make vfad/vavg/vtfm ops more accurate
  • Loading branch information
hrydgard committed Apr 4, 2019
2 parents 4d580c3 + ec7cffa commit b5db387
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 67 deletions.
8 changes: 6 additions & 2 deletions Core/MIPS/IR/IRCompVFPU.cpp
Expand Up @@ -1327,10 +1327,14 @@ namespace MIPSComp {

void IRFrontend::Comp_Vtfm(MIPSOpcode op) {
CONDITIONAL_DISABLE(VFPU_MTX);
if (!js.HasNoPrefix()) {
DISABLE;
}

// Vertex transform, vector by matrix (no prefixes)
// Vertex transform, vector by matrix (weird prefixes)
// d[N] = s[N*m .. N*m + n-1] dot t[0 .. n-1]
// Homogenous means t[n-1] is treated as 1.
// Note: this might be implemented as a series of vdots with special prefixes.

VectorSize sz = GetVecSize(op);
MatrixSize msz = GetMtxSize(op);
Expand All @@ -1346,7 +1350,7 @@ namespace MIPSComp {
}
// Otherwise, n should already be ins + 1.
else if (n != ins + 1) {
INVALIDOP;
DISABLE;
}

u8 sregs[16], dregs[4], tregs[4];
Expand Down
141 changes: 76 additions & 65 deletions Core/MIPS/MIPSIntVFPU.cpp
Expand Up @@ -1390,46 +1390,60 @@ namespace MIPSInt
EatPrefixes();
}

void Int_Vfad(MIPSOpcode op)
{
float s[4];
void Int_Vfad(MIPSOpcode op) {
float s[4]{}, t[4]{};
float d;
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
ReadVector(s, sz, vs);
ApplySwizzleS(s, sz);
float sum = 0.0f;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
sum += s[i];
ApplySwizzleS(s, V_Quad);

// T prefix generates constants, but abs can change the constant.
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

d = 0.0f;
for (int i = 0; i < 4; i++) {
d += s[i] * t[i];
}
d = sum;
ApplyPrefixD(&d,V_Single);
V(vd) = d;
ApplyPrefixD(&d, V_Single);
WriteVector(&d, V_Single, vd);
PC += 4;
EatPrefixes();
}

void Int_Vavg(MIPSOpcode op)
{
float s[4];
void Int_Vavg(MIPSOpcode op) {
float s[4]{}, t[4]{};
float d;
int vd = _VD;
int vs = _VS;
VectorSize sz = GetVecSize(op);
ReadVector(s, sz, vs);
ApplySwizzleS(s, sz);
float sum = 0.0f;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
sum += s[i];
ApplySwizzleS(s, V_Quad);

// T prefix generates constants, but supports negate.
u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_ABS(1, 1, 1, 1);
u32 tprefixAdd;
if (sz == V_Single)
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
else if (sz == V_Pair)
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF);
else if (sz == V_Triple)
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD);
else if (sz == V_Quad)
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH);
else
tprefixAdd = 0;
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

d = 0.0f;
for (int i = 0; i < 4; i++) {
d += s[i] * t[i];
}
d = sum / n;
ApplyPrefixD(&d, V_Single);
V(vd) = d;
WriteVector(&d, V_Single, vd);
PC += 4;
EatPrefixes();
}
Expand Down Expand Up @@ -1543,61 +1557,58 @@ namespace MIPSInt
EatPrefixes();
}

void Int_Vtfm(MIPSOpcode op)
{
void Int_Vtfm(MIPSOpcode op) {
float s[16]{}, t[4]{}, d[4];
int vd = _VD;
int vs = _VS;
int vt = _VT;
int ins = (op >> 23) & 7;

VectorSize sz = GetVecSize(op);
MatrixSize msz = GetMtxSize(op);
int n = GetNumVectorElements(sz);

bool homogenous = false;
if (n == ins)
{
n++;
sz = (VectorSize)((int)(sz) + 1);
msz = (MatrixSize)((int)(msz) + 1);
homogenous = true;
}
VectorSize sz = (VectorSize)(ins + 1);
MatrixSize msz = (MatrixSize)(ins + 1);
int n = GetNumVectorElements(GetVecSize(op));

float s[16];
int tn = std::min(n, ins + 1);
ReadMatrix(s, msz, vs);
float t[4];
ReadVector(t, sz, vt);
float d[4];

if (homogenous)
{
for (int i = 0; i < n; i++)
{
d[i] = 0.0f;
for (int k = 0; k < n; k++)
{
d[i] += (k == n-1) ? s[i*4+k] : (s[i*4+k] * t[k]);
}
for (int i = 0; i < ins; i++) {
d[i] = s[i * 4] * t[0];
for (int k = 1; k < tn; k++) {
d[i] += s[i * 4 + k] * t[k];
}
}
else if (n == ins + 1)
{
for (int i = 0; i < n; i++)
{
d[i] = s[i*4] * t[0];
for (int k = 1; k < n; k++)
{
d[i] += s[i*4+k] * t[k];
}
if (ins >= n) {
d[i] += s[i * 4 + ins];
}
}
else
{
Reporting::ReportMessage("Trying to interpret instruction that can't be interpreted (BADVTFM)");
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted (BADVTFM)");
for (int i = 0; i < n; i++)
d[i] = 0.0;

// S and T prefixes apply for the final row only.
// The T prefix is used to apply zero/one constants, but abs still changes it.
ApplySwizzleS(&s[ins * 4], V_Quad);
VFPUConst constX = VFPUConst::NONE;
VFPUConst constY = n < 2 ? VFPUConst::ZERO : VFPUConst::NONE;
VFPUConst constZ = n < 3 ? VFPUConst::ZERO : VFPUConst::NONE;
VFPUConst constW = n < 4 ? VFPUConst::ZERO : VFPUConst::NONE;
if (ins >= n) {
constY = ins == 1 ? VFPUConst::ONE : VFPUConst::ZERO;
constZ = ins == 2 ? VFPUConst::ONE : VFPUConst::ZERO;
constW = ins == 3 ? VFPUConst::ONE : VFPUConst::ZERO;
}
u32 tprefixRemove = VFPU_SWIZZLE(0, n < 2 ? 3 : 0, n < 3 ? 3 : 0, n < 4 ? 3 : 0);
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);

// Really this is the operation all rows probably use (with constant wiring.)
d[ins] = s[ins * 4] * t[0];
for (int k = 1; k < 4; k++) {
d[ins] += s[ins * 4 + k] * t[k];
}

// D prefix applies to the last element only.
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << ins;
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (ins + ins);
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
ApplyPrefixD(d, sz);
WriteVector(d, sz, vd);
PC += 4;
EatPrefixes();
Expand Down

0 comments on commit b5db387

Please sign in to comment.