Permalink
Browse files

Fix a PIC compliance bug in the VFPU. Comment other cases properly (f…

…or easy searching).
  • Loading branch information...
hrydgard committed Aug 29, 2017
1 parent 207f903 commit 8d0498303ac438cd276e3ad9485ca4cccbc7b61a
View
@@ -173,7 +173,6 @@ struct OpArg
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
void WriteFloatModRM(XEmitter *emit, FloatOp op);
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
// This one is public - must be written to
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
@@ -716,8 +716,6 @@ void ArmJit::CompNEON_Mftv(MIPSOpcode op) {
gpr.MapReg(rt);
STR(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128));
}
//gpr.BindToRegister(rt, true, false);
//MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
// TODO: Optimization if rt is Imm?
// Set these BEFORE disable!
View
@@ -1548,7 +1548,7 @@ void Jit::Comp_Vi2f(MIPSOpcode op) {
if (*mult != 1.0f) {
if (RipAccessible(mult)) {
MOVSS(XMM1, M(mult));
MOVSS(XMM1, M(mult)); // rip accessible
} else {
MOV(PTRBITS, R(TEMPREG), ImmPtr(mult));
MOVSS(XMM1, MatR(TEMPREG));
@@ -1656,14 +1656,14 @@ void Jit::Comp_Vh2f(MIPSOpcode op) {
// OK, 16 bits in each word.
// Let's go. Deep magic here.
MOVAPS(XMM1, R(XMM0));
ANDPS(XMM0, M(&mask_nosign[0])); // xmm0 = expmant
ANDPS(XMM0, M(&mask_nosign[0])); // xmm0 = expmant. not rip accessible but bailing above
XORPS(XMM1, R(XMM0)); // xmm1 = justsign = expmant ^ xmm0
MOVAPS(tempR, R(XMM0));
PCMPGTD(tempR, M(&was_infnan[0])); // xmm2 = b_wasinfnan
PCMPGTD(tempR, M(&was_infnan[0])); // xmm2 = b_wasinfnan. not rip accessible but bailing above
PSLLD(XMM0, 13);
MULPS(XMM0, M(magic)); /// xmm0 = scaled
PSLLD(XMM1, 16); // xmm1 = sign
ANDPS(tempR, M(&exp_infnan[0]));
ANDPS(tempR, M(&exp_infnan[0])); // not rip accessible but bailing above
ORPS(XMM1, R(tempR));
ORPS(XMM0, R(XMM1));
@@ -1747,17 +1747,17 @@ void Jit::Comp_Vx2i(MIPSOpcode op) {
// vuc2i is a bit special. It spreads out the bits like this:
// s[0] = 0xDDCCBBAA -> d[0] = (0xAAAAAAAA >> 1), d[1] = (0xBBBBBBBB >> 1), etc.
MOVSS(XMM0, fpr.V(sregs[0]));
if (cpu_info.bSSSE3) {
if (cpu_info.bSSSE3 && RipAccessible(vuc2i_shuffle)) {
// Not really different speed. Generates a bit less code.
PSHUFB(XMM0, M(&vuc2i_shuffle[0]));
PSHUFB(XMM0, M(&vuc2i_shuffle[0])); // rip accessible
} else {
// First, we change 0xDDCCBBAA to 0xDDDDCCCCBBBBAAAA.
PUNPCKLBW(XMM0, R(XMM0));
// Now, interleave each 16 bits so they're all 32 bits wide.
PUNPCKLWD(XMM0, R(XMM0));
}
} else {
if (cpu_info.bSSSE3) {
if (cpu_info.bSSSE3 && RipAccessible(vc2i_shuffle)) {
MOVSS(XMM0, fpr.V(sregs[0]));
PSHUFB(XMM0, M(&vc2i_shuffle[0]));
} else {
@@ -3269,7 +3269,7 @@ void Jit::Comp_Vi2x(MIPSOpcode op) {
// At this point, everything is aligned in the high bits of our lanes.
if (cpu_info.bSSSE3) {
if (RipAccessible(vi2xc_shuffle)) {
PSHUFB(dst0, bits == 8 ? M(vi2xc_shuffle) : M(vi2xs_shuffle));
PSHUFB(dst0, bits == 8 ? M(vi2xc_shuffle) : M(vi2xs_shuffle)); // rip accessible
} else {
MOV(PTRBITS, R(TEMPREG), bits == 8 ? ImmPtr(vi2xc_shuffle) : ImmPtr(vi2xs_shuffle));
PSHUFB(dst0, MatR(TEMPREG));
@@ -91,7 +91,7 @@ bool JitSafeMem::PrepareWrite(OpArg &dest, int size)
#endif
#if PPSSPP_ARCH(32BIT)
dest = M(Memory::base + addr);
dest = M(Memory::base + addr); // 32-bit only
#else
dest = MDisp(MEMBASEREG, addr);
#endif
@@ -120,7 +120,7 @@ bool JitSafeMem::PrepareRead(OpArg &src, int size)
#endif
#if PPSSPP_ARCH(32BIT)
src = M(Memory::base + addr);
src = M(Memory::base + addr); // 32-bit only
#else
src = MDisp(MEMBASEREG, addr);
#endif
@@ -144,7 +144,7 @@ OpArg JitSafeMem::NextFastAddress(int suboffset)
#endif
#if PPSSPP_ARCH(32BIT)
return M(Memory::base + addr);
return M(Memory::base + addr); // 32-bit only
#else
return MDisp(MEMBASEREG, addr);
#endif
@@ -932,7 +932,7 @@ void VertexDecoderJitCache::Jit_Color4444() {
// Spread to RGBA -> R00GB00A.
PUNPCKLBW(fpScratchReg, R(fpScratchReg));
if (RipAccessible(&color4444mask[0])) {
PAND(fpScratchReg, M(&color4444mask[0]));
PAND(fpScratchReg, M(&color4444mask[0])); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&color4444mask));
PAND(fpScratchReg, MatR(tempReg1));
@@ -210,7 +210,11 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
MOVD_xmm(fpScratchReg5, MDisp(RSP, 24));
CVTDQ2PS(fpScratchReg5, R(fpScratchReg5));
SHUFPS(fpScratchReg5, R(fpScratchReg5), _MM_SHUFFLE(0, 0, 0, 0));
MULPS(fpScratchReg5, M(by256));
if (RipAccessible(by256)) {
MULPS(fpScratchReg5, M(by256)); // rip accessible
} else {
Crash(); // TODO
}
MOVAPS(XMM0, M(ones));
SUBPS(XMM0, R(fpScratchReg5));
@@ -551,7 +555,11 @@ static const u32 MEMORY_ALIGNED16(color4444mask[4]) = { 0xf00ff00f, 0xf00ff00f,
bool SamplerJitCache::Jit_Decode4444() {
MOVD_xmm(fpScratchReg1, R(resultReg));
PUNPCKLBW(fpScratchReg1, R(fpScratchReg1));
PAND(fpScratchReg1, M(color4444mask));
if (RipAccessible(color4444mask)) {
PAND(fpScratchReg1, M(color4444mask)); // rip accessible
} else {
Crash();
}
MOVSS(fpScratchReg2, R(fpScratchReg1));
MOVSS(fpScratchReg3, R(fpScratchReg1));
PSRLW(fpScratchReg2, 4);

0 comments on commit 8d04983

Please sign in to comment.