Skip to content

Commit

Permalink
Merge pull request #25 from Tilka/ppc_fp
Browse files Browse the repository at this point in the history
Fix non-IEEE mode
  • Loading branch information
delroth committed Feb 23, 2014
2 parents f9ed70b + ee21cbe commit 311caef
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 57 deletions.
44 changes: 40 additions & 4 deletions Source/Core/Common/x64Emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
{
// Oh, RIP addressing.
_offsetOrBaseReg = 5;
emit->WriteModRM(0, _operandReg&7, 5);
emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
//TODO : add some checks
#ifdef _M_X64
u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
Expand Down Expand Up @@ -327,7 +327,6 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
}
}


// W = operand extended width (1 if 64-bit)
// R = register# upper bit
// X = scale amnt upper bit
Expand Down Expand Up @@ -1390,6 +1389,10 @@ void XEmitter::PSRLQ(X64Reg reg, int shift) {
Write8(shift);
}

void XEmitter::PSRLQ(X64Reg reg, OpArg arg) {
WriteSSEOp(64, 0xd3, true, reg, arg);
}

void XEmitter::PSLLW(X64Reg reg, int shift) {
WriteSSEOp(64, 0x71, true, (X64Reg)6, R(reg));
Write8(shift);
Expand Down Expand Up @@ -1437,7 +1440,19 @@ void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {
Write8(0x0f);
Write8(0x38);
Write8(0x00);
arg.WriteRest(this, 0);
arg.WriteRest(this);
}

void XEmitter::PTEST(X64Reg dest, OpArg arg) {
if (!cpu_info.bSSE4_1) {
PanicAlert("Trying to use PTEST on a system that doesn't support it. Nobody hears your screams.");
}
Write8(0x66);
Write8(0x0f);
Write8(0x38);
Write8(0x17);
arg.operandReg = dest;
arg.WriteRest(this);
}

void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);}
Expand All @@ -1458,7 +1473,7 @@ void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDD, true, dest
void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF8, true, dest, arg);}
void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF9, true, dest, arg);}
void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFA, true, dest, arg);}
void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);}
void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFB, true, dest, arg);}

void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE8, true, dest, arg);}
void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE9, true, dest, arg);}
Expand Down Expand Up @@ -1497,6 +1512,8 @@ void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64,
void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);}
void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);}
void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);}
void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseAND, false, regOp1, regOp2, arg);}
void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseANDN, false, regOp1, regOp2, arg);}

// Prefixes

Expand All @@ -1509,6 +1526,25 @@ void XEmitter::FWAIT()
Write8(0x9B);
}

// TODO: make this more generic
void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, OpArg arg)
{
int mf = 0;
switch (bits) {
case 32: mf = 0; break;
case 64: mf = 2; break;
default: _assert_msg_(DYNA_REC, 0, "WriteFloatLoadStore: bits is not 32 or 64");
}
Write8(0xd9 | (mf << 1));
// x87 instructions use the reg field of the ModR/M byte as opcode:
arg.WriteRest(this, 0, (X64Reg) op);
}

void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, src);}
void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, dest);}
void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, dest);}
void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }

void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }

// helper routines for setting pointers
Expand Down
34 changes: 34 additions & 0 deletions Source/Core/Common/x64Emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ enum NormalOp {
nrmXCHG,
};

enum FloatOp {
floatLD = 0,
floatST = 2,
floatSTP = 3,
};

class XEmitter;

// RIP addressing does not benefit from micro op fusion on Core arch
Expand All @@ -118,6 +124,7 @@ struct OpArg
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
void WriteFloatModRM(XEmitter *emit, FloatOp op);
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
// This one is public - must be written to
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
Expand Down Expand Up @@ -247,6 +254,7 @@ class XEmitter
void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteFloatLoadStore(int bits, FloatOp op, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);

protected:
Expand Down Expand Up @@ -427,6 +435,28 @@ class XEmitter
void REP();
void REPNE();

// x87
enum x87StatusWordBits {
x87_InvalidOperation = 0x1,
x87_DenormalizedOperand = 0x2,
x87_DivisionByZero = 0x4,
x87_Overflow = 0x8,
x87_Underflow = 0x10,
x87_Precision = 0x20,
x87_StackFault = 0x40,
x87_ErrorSummary = 0x80,
x87_C0 = 0x100,
x87_C1 = 0x200,
x87_C2 = 0x400,
x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
x87_C3 = 0x4000,
x87_FPUBusy = 0x8000,
};

void FLD(int bits, OpArg src);
void FST(int bits, OpArg dest);
void FSTP(int bits, OpArg dest);
void FNSTSW_AX();
void FWAIT();

// SSE/SSE2: Floating point arithmetic
Expand Down Expand Up @@ -553,6 +583,7 @@ class XEmitter
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);

void PTEST(X64Reg dest, OpArg arg);
void PAND(X64Reg dest, OpArg arg);
void PANDN(X64Reg dest, OpArg arg);
void PXOR(X64Reg dest, OpArg arg);
Expand Down Expand Up @@ -608,6 +639,7 @@ class XEmitter
void PSRLW(X64Reg reg, int shift);
void PSRLD(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, OpArg arg);

void PSLLW(X64Reg reg, int shift);
void PSLLD(X64Reg reg, int shift);
Expand All @@ -622,6 +654,8 @@ class XEmitter
void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);

void RTDSC();

Expand Down
9 changes: 4 additions & 5 deletions Source/Core/Common/x64FPURoundMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10;
#endif

// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
const u32 EXCEPTION_MASK = 0x1F80;
static const u32 EXCEPTION_MASK = 0x1F80;
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
const u32 DAZ = 0x40;
static const u32 DAZ = 0x40;
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
const u32 FTZ = 0x8000;
static const u32 FTZ = 0x8000;

namespace FPURoundMode
{
Expand Down Expand Up @@ -100,8 +100,7 @@ namespace FPURoundMode
FTZ, // flush-to-zero only
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
};
// FIXME: proper (?) non-IEEE mode emulation causes issues in lots of games
if (nonIEEEMode && false)
if (nonIEEEMode)
{
csr |= denormalLUT[cpu_info.bFlushToZero];
}
Expand Down
35 changes: 35 additions & 0 deletions Source/Core/Core/PowerPC/Interpreter/Interpreter_FPUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,38 @@ inline u32 ConvertToSingleFTZ(u64 x)
return (x >> 32) & 0x80000000;
}
}

inline u64 ConvertToDouble(u32 _x)
{
// This is a little-endian re-implementation of the algorithm described in
// the PowerPC Programming Environments Manual for loading single
// precision floating point numbers.
// See page 566 of http://www.freescale.com/files/product/doc/MPCFPE32B.pdf

u64 x = _x;
u64 exp = (x >> 23) & 0xff;
u64 frac = x & 0x007fffff;

if (exp > 0 && exp < 255) // Normal number
{
u64 y = !(exp >> 7);
u64 z = y << 61 | y << 60 | y << 59;
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
}
else if (exp == 0 && frac != 0) // Subnormal number
{
exp = 1023 - 126;
do
{
frac <<= 1;
exp -= 1;
} while ((frac & 0x00800000) == 0);
return ((x & 0x80000000) << 32) | (exp << 52) | ((frac & 0x007fffff) << 29);
}
else // QNaN, SNaN or Zero
{
u64 y = exp >> 7;
u64 z = y << 61 | y << 60 | y << 59;
return ((x & 0xc0000000) << 32) | z | ((x & 0x3fffffff) << 29);
}
}
27 changes: 12 additions & 15 deletions Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ void Interpreter::lfs(UGeckoInstruction _inst)
u32 uTemp = Memory::Read_U32(Helper_Get_EA(_inst));
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
u64 value = ConvertToDouble(uTemp);
riPS0(_inst.FD) = value;
riPS1(_inst.FD) = value;
}
}

Expand All @@ -104,9 +104,9 @@ void Interpreter::lfsu(UGeckoInstruction _inst)
u32 uTemp = Memory::Read_U32(uAddress);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
u64 value = ConvertToDouble(uTemp);
riPS0(_inst.FD) = value;
riPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress;
}

Expand All @@ -118,9 +118,9 @@ void Interpreter::lfsux(UGeckoInstruction _inst)
u32 uTemp = Memory::Read_U32(uAddress);
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
u64 value = ConvertToDouble(uTemp);
riPS0(_inst.FD) = value;
riPS1(_inst.FD) = value;
m_GPR[_inst.RA] = uAddress;
}
}
Expand All @@ -130,9 +130,9 @@ void Interpreter::lfsx(UGeckoInstruction _inst)
u32 uTemp = Memory::Read_U32(Helper_Get_EA_X(_inst));
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI))
{
double value = *(float*)&uTemp;
rPS0(_inst.FD) = value;
rPS1(_inst.FD) = value;
u64 value = ConvertToDouble(uTemp);
riPS0(_inst.FD) = value;
riPS1(_inst.FD) = value;
}
}

Expand Down Expand Up @@ -281,9 +281,6 @@ void Interpreter::stfdu(UGeckoInstruction _inst)

void Interpreter::stfs(UGeckoInstruction _inst)
{
//double value = rPS0(_inst.FS);
//float fTemp = (float)value;
//Memory::Write_U32(*(u32*)&fTemp, Helper_Get_EA(_inst));
Memory::Write_U32(ConvertToSingle(riPS0(_inst.FS)), Helper_Get_EA(_inst));
}

Expand Down
2 changes: 1 addition & 1 deletion Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ void RegCache::Flush(FlushMode mode)
{
if (locks[i])
{
PanicAlert("Someone forgot to unlock PPC reg %i.", i);
PanicAlert("Someone forgot to unlock PPC reg %i (X64 reg %i).", i, RX(i));
}
if (regs[i].away)
{
Expand Down
Loading

0 comments on commit 311caef

Please sign in to comment.