Permalink
Browse files

Some code cleanup. More work towards removing RIP addressing

  • Loading branch information...
hrydgard committed Jul 5, 2017
1 parent 80b82ec commit 78538ff61e9c24be930f9dc7fccce493e09b32e3
Showing with 42 additions and 71 deletions.
  1. +3 −0 Core/MIPS/MIPS.h
  2. +37 −71 Core/MIPS/x86/CompVFPU.cpp
  3. +2 −0 Core/MIPS/x86/RegCacheFPU.cpp
View
@@ -153,6 +153,9 @@ enum class CPUCore;
// can reach both GPR and FPR regs.
#define MIPSSTATE_VAR(x) MDisp(X64JitConstants::CTXREG, (int)(offsetof(MIPSState, x) - offsetof(MIPSState, f[0])))
// To get RIP/relative addressing (requires tight memory control so generated code isn't too far from the binary, and a reachable variable called mips):
// #define MIPSSTATE_VAR(x) M(&mips->x)
#endif
class MIPSState
View
@@ -150,8 +150,7 @@ void Jit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) {
return;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
for (int i = 0; i < n; i++) {
// Hopefully this is rare, we'll just write it into a reg we drop.
if (js.VfpuWriteMask(i))
regs[i] = fpr.GetTempV();
@@ -163,14 +162,12 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
if (!js.prefixD) return;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
for (int i = 0; i < n; i++) {
if (js.VfpuWriteMask(i))
continue;
int sat = (js.prefixD >> (i * 2)) & 3;
if (sat == 1)
{
if (sat == 1) {
fpr.MapRegV(vregs[i], MAP_DIRTY);
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
@@ -181,9 +178,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
// Retain a NAN in XMM0 (must be second operand.)
MOVSS(fpr.VX(vregs[i]), M(&one));
MINSS(fpr.VX(vregs[i]), R(XMM0));
}
else if (sat == 3)
{
} else if (sat == 3) {
fpr.MapRegV(vregs[i], MAP_DIRTY);
// Check for < -1.0f, but careful of NANs.
@@ -205,15 +200,12 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
// Vector regs can overlap in all sorts of swizzled ways.
// This does allow a single overlap in sregs[i].
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
{
for (int i = 0; i < sn; ++i)
{
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) {
for (int i = 0; i < sn; ++i) {
if (sregs[i] == dreg && i != di)
return false;
}
for (int i = 0; i < tn; ++i)
{
for (int i = 0; i < tn; ++i) {
if (tregs[i] == dreg)
return false;
}
@@ -222,8 +214,7 @@ bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tr
return true;
}
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
{
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) {
return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg;
}
@@ -236,8 +227,7 @@ void Jit::Comp_SV(MIPSOpcode op) {
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
MIPSGPReg rs = _RS;
switch (op >> 26)
{
switch (op >> 26) {
case 50: //lv.s // VI(vt) = Memory::Read_U32(addr);
{
gpr.Lock(rs);
@@ -291,16 +281,14 @@ void Jit::Comp_SV(MIPSOpcode op) {
}
}
void Jit::Comp_SVQ(MIPSOpcode op)
{
void Jit::Comp_SVQ(MIPSOpcode op) {
CONDITIONAL_DISABLE;
int imm = (signed short)(op&0xFFFC);
int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
MIPSGPReg rs = _RS;
switch (op >> 26)
{
switch (op >> 26) {
case 53: //lvl.q/lvr.q
{
if (!g_Config.bFastMemory) {
@@ -421,16 +409,13 @@ void Jit::Comp_SVQ(MIPSOpcode op)
JitSafeMem safe(this, rs, imm);
safe.SetFar();
OpArg src;
if (safe.PrepareRead(src, 16))
{
if (safe.PrepareRead(src, 16)) {
// Just copy 4 words the easiest way while not wasting registers.
for (int i = 0; i < 4; i++)
MOVSS(fpr.VX(vregs[i]), safe.NextFastAddress(i * 4));
}
if (safe.PrepareSlowRead(safeMemFuncs.readU32))
{
for (int i = 0; i < 4; i++)
{
if (safe.PrepareSlowRead(safeMemFuncs.readU32)) {
for (int i = 0; i < 4; i++) {
safe.NextSlowRead(safeMemFuncs.readU32, i * 4);
MOVD_xmm(fpr.VX(vregs[i]), R(EAX));
}
@@ -485,15 +470,12 @@ void Jit::Comp_SVQ(MIPSOpcode op)
JitSafeMem safe(this, rs, imm);
safe.SetFar();
OpArg dest;
if (safe.PrepareWrite(dest, 16))
{
if (safe.PrepareWrite(dest, 16)) {
for (int i = 0; i < 4; i++)
MOVSS(safe.NextFastAddress(i * 4), fpr.VX(vregs[i]));
}
if (safe.PrepareSlowWrite())
{
for (int i = 0; i < 4; i++)
{
if (safe.PrepareSlowWrite()) {
for (int i = 0; i < 4; i++) {
MOVSS(M(&ssLoadStoreTemp), fpr.VX(vregs[i]));
safe.DoSlowWrite(safeMemFuncs.writeU32, M(&ssLoadStoreTemp), i * 4);
}
@@ -578,8 +560,7 @@ void Jit::Comp_VIdt(MIPSOpcode op) {
XORPS(XMM0, R(XMM0));
MOVSS(XMM1, M(&one));
fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
switch (sz)
{
switch (sz) {
case V_Pair:
MOVSS(fpr.VX(dregs[0]), R((vd&1)==0 ? XMM1 : XMM0));
MOVSS(fpr.VX(dregs[1]), R((vd&1)==1 ? XMM1 : XMM0));
@@ -749,17 +730,15 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
fpr.SimpleRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
X64Reg tempxreg = XMM0;
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs))
{
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs)) {
fpr.MapRegsV(dregs, V_Single, MAP_DIRTY | MAP_NOINIT);
tempxreg = fpr.VX(dregs[0]);
}
// Need to start with +0.0f so it doesn't result in -0.0f.
MOVSS(tempxreg, fpr.V(sregs[0]));
MULSS(tempxreg, fpr.V(tregs[0]));
for (int i = 1; i < n; i++)
{
for (int i = 1; i < n; i++) {
// sum += (i == n-1) ? t[i] : s[i]*t[i];
if (i == n - 1) {
ADDSS(tempxreg, fpr.V(tregs[i]));
@@ -2265,7 +2244,8 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
// Zero out XMM0 if it was <= +0.0f (but skip NAN.)
MOVSS(R(XMM0), tempxregs[i]);
CMPLESS(XMM0, M(&zero));
XORPS(XMM1, R(XMM1));
CMPLESS(XMM0, R(XMM1));
ANDNPS(XMM0, R(tempxregs[i]));
// Retain a NAN in XMM0 (must be second operand.)
@@ -2519,7 +2499,7 @@ void Jit::Comp_VMatrixInit(MIPSOpcode op) {
switch ((op >> 16) & 0xF) {
case 3: // vmidt
MOVSS(XMM0, M(&zero));
XORPS(XMM0, R(XMM0));
MOVSS(XMM1, M(&one));
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
@@ -2528,7 +2508,7 @@ void Jit::Comp_VMatrixInit(MIPSOpcode op) {
}
break;
case 6: // vmzero
MOVSS(XMM0, M(&zero));
XORPS(XMM0, R(XMM0));
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
MOVSS(fpr.V(dregs[a * 4 + b]), XMM0);
@@ -2614,21 +2594,17 @@ void Jit::Comp_Vmmov(MIPSOpcode op) {
// Potentially detect overlap or the safe direction to move in, or just DISABLE?
// This is very not optimal, blows the regcache everytime.
u8 tempregs[16];
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
u8 temp = (u8) fpr.GetTempV();
fpr.MapRegV(temp, MAP_NOINIT | MAP_DIRTY);
MOVSS(fpr.VX(temp), fpr.V(sregs[a * 4 + b]));
fpr.StoreFromRegisterV(temp);
tempregs[a * 4 + b] = temp;
}
}
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
u8 temp = tempregs[a * 4 + b];
fpr.MapRegV(temp, 0);
MOVSS(fpr.V(dregs[a * 4 + b]), fpr.VX(temp));
@@ -2674,30 +2650,24 @@ void Jit::Comp_VScl(MIPSOpcode op) {
MOVSS(XMM0, fpr.V(scale));
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i)
{
if (dregs[i] != scale || !IsOverlapSafeAllowS(dregs[i], i, n, sregs))
{
for (int i = 0; i < n; ++i) {
if (dregs[i] != scale || !IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
int reg = fpr.GetTempV();
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
fpr.SpillLockV(reg);
tempxregs[i] = fpr.VX(reg);
}
else
{
} else {
fpr.MapRegV(dregs[i], dregs[i] == sregs[i] ? MAP_DIRTY : MAP_NOINIT);
fpr.SpillLockV(dregs[i]);
tempxregs[i] = fpr.VX(dregs[i]);
}
}
for (int i = 0; i < n; ++i)
{
for (int i = 0; i < n; ++i) {
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
MULSS(tempxregs[i], R(XMM0));
}
for (int i = 0; i < n; ++i)
{
for (int i = 0; i < n; ++i) {
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
}
@@ -2925,10 +2895,8 @@ void Jit::Comp_Vmscl(MIPSOpcode op) {
// TODO: test overlap, optimize.
u8 tempregs[16];
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
u8 temp = (u8) fpr.GetTempV();
fpr.MapRegV(temp, MAP_NOINIT | MAP_DIRTY);
MOVSS(fpr.VX(temp), fpr.V(sregs[a * 4 + b]));
@@ -2937,10 +2905,8 @@ void Jit::Comp_Vmscl(MIPSOpcode op) {
tempregs[a * 4 + b] = temp;
}
}
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
u8 temp = tempregs[a * 4 + b];
fpr.MapRegV(temp, 0);
MOVSS(fpr.V(dregs[a * 4 + b]), fpr.VX(temp));
@@ -897,6 +897,8 @@ OpArg FPURegCache::GetDefaultLocation(int reg) const {
return MDisp(CTXREG, reg * 4);
} else if (reg < 32 + 128) {
return M(&mips->v[voffset[reg - 32]]);
// This should work, but doesn't seem to. Maybe used from somewhere where CTXREG is not yet set properly.
// return MDisp(CTXREG, offsetof(MIPSState, v[0]) - offsetof(MIPSState, f[0]) + voffset[reg - 32] * sizeof(float));
} else {
return M(&tempValues[reg - 32 - 128]);
}

0 comments on commit 78538ff

Please sign in to comment.