Skip to content

Commit

Permalink
Merge pull request #1100 from FioraAeterna/psq_insts
Browse files Browse the repository at this point in the history
JIT: implement remaining psq_l/st instruction variants
  • Loading branch information
Sonicadvance1 committed Sep 19, 2014
2 parents 522d7eb + 29fc151 commit 7cc586d
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 34 deletions.
3 changes: 2 additions & 1 deletion Source/Core/Core/PowerPC/Gekko.h
Expand Up @@ -267,7 +267,8 @@ union UGeckoInstruction
// paired single quantized load/store
struct
{
u32 : 7;
u32 : 1;
u32 SUBOP6 : 6;
// Graphics quantization register to use
u32 Ix : 3;
// 0: paired single, 1: scalar
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/Jit64/Jit.h
Expand Up @@ -220,8 +220,8 @@ class Jit64 : public Jitx86Base
void lfXXX(UGeckoInstruction inst);
void stfXXX(UGeckoInstruction inst);
void stfiwx(UGeckoInstruction inst);
void psq_l(UGeckoInstruction inst);
void psq_st(UGeckoInstruction inst);
void psq_lXX(UGeckoInstruction inst);
void psq_stXX(UGeckoInstruction inst);

void fmaddXX(UGeckoInstruction inst);
void fsign(UGeckoInstruction inst);
Expand Down
16 changes: 8 additions & 8 deletions Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
Expand Up @@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] =
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},

{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},

//missing: 0, 5, 6, 9, 22, 30, 62, 58
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
Expand Down Expand Up @@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] =

static GekkoOPTemplate table4_3[] =
{
{6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
{7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}},
{38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}},
{39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}},
{6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}},
{7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}},
{38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}},
{39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}},
};

static GekkoOPTemplate table19[] =
Expand Down
71 changes: 48 additions & 23 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
Expand Up @@ -16,33 +16,41 @@ using namespace Gen;

// The big problem is likely instructions that set the quantizers in the same block.
// We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst)
void Jit64::psq_stXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA);

s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 61 && offset;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA;
int s = inst.RS;
int b = indexed ? inst.RB : a;
int s = inst.FS;

gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
fpr.BindToRegister(s, true, false);
if (offset && gpr.R(a).IsSimpleReg())
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset)
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
// In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck)
if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
// Hence, we need to mask out the unused bits. The layout of the GQR register is
Expand All @@ -67,56 +75,73 @@ void Jit64::psq_st(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
}

if (update && offset && js.memcheck)
if (update && js.memcheck)
{
MEMCHECK_START(false)
ADD(32, gpr.R(a), Imm32((u32)offset));
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
gpr.UnlockAll();
gpr.UnlockAllX();
}

void Jit64::psq_l(UGeckoInstruction inst)
void Jit64::psq_lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA);

s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 57 && offset;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA;
int s = inst.RS;
int b = indexed ? inst.RB : a;
int s = inst.FS;

gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update && offset);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
if (offset && gpr.R(a).IsSimpleReg())
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset)
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
// In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck)
if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2));

// Get the high part of the GQR register
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]);
gqr.offset += 2;

AND(32, R(RSCRATCH2), gqr);
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
if (inst.W)
OR(32, R(RSCRATCH), Imm8(8));

CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8])));

MEMCHECK_START(false)
CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && offset && js.memcheck)
if (update && js.memcheck)
{
ADD(32, gpr.R(a), Imm32((u32)offset));
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
}
MEMCHECK_END

Expand Down

0 comments on commit 7cc586d

Please sign in to comment.