diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index 5f1215f06a1c..627e56e32dbf 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -84,7 +84,7 @@ void Trace(UGeckoInstruction& instCode) char ppcInst[256]; DisassembleGekko(instCode.hex, PC, ppcInst, 256); - DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str(), instCode.hex, ppcInst); + DEBUG_LOG(POWERPC, "INTER PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx FPSCR: %08x MSR: %08x LR: %08x %s %08x %s", PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), instCode.hex, ppcInst); } int Interpreter::SingleStepInner(void) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp index c166f107f24c..0fa0f8f4a82a 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Integer.cpp @@ -4,32 +4,22 @@ #include "Core/PowerPC/Interpreter/Interpreter.h" -void Interpreter::Helper_UpdateCR0(u32 _uValue) -{ - u32 new_cr0; - int sValue = (int)_uValue; - if (sValue > 0) - new_cr0 = 0x4; - else if (sValue < 0) - new_cr0 = 0x8; - else - new_cr0 = 0x2; - new_cr0 |= GetXER_SO(); - SetCRField(0, new_cr0); +void Interpreter::Helper_UpdateCR0(u32 value) +{ + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[0] = cr_val; } -void Interpreter::Helper_UpdateCRx(int _x, u32 _uValue) +void Interpreter::Helper_UpdateCRx(int idx, u32 value) { - u32 new_crX; - int sValue = (int)_uValue; - if (sValue > 0) - new_crX = 0x4; - else if (sValue < 0) - new_crX = 0x8; - else - new_crX = 0x2; - new_crX |= GetXER_SO(); - SetCRField(_x, new_crX); + s64 sign_extended = (s64)(s32)value; + u64 cr_val = (u64)sign_extended; + cr_val = (cr_val & ~(1ull << 61)) | ((u64)GetXER_SO() << 61); + + PowerPC::ppcState.cr_val[idx] = cr_val; } u32 Interpreter::Helper_Carry(u32 _uValue1, u32 _uValue2) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 757c15c89d29..21d9f10d1459 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -382,10 +382,8 @@ void Jit64::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, - PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); + DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } void STACKALIGN Jit64::Jit(u32 em_address) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index d273d32aaf30..f0890dba707d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -106,6 +106,16 @@ class Jit64 : public Jitx86Base void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); + // Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, + // don't forget to xlock it before. + void GetCRFieldBit(int field, int bit, Gen::X64Reg out); + // Clobbers ABI_PARAM1 and ABI_PARAM2, xlock them before. + void SetCRFieldBit(int field, int bit, Gen::X64Reg in); + + // Generates a branch that will check if a given bit of a CR register part + // is set or not. + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); + void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg)); typedef u32 (*Operation)(u32 a, u32 b); void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 4b4e9ab0e6d5..135e0c4f3c4a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -117,11 +117,8 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } if (inst.LK) @@ -179,14 +176,8 @@ void Jit64::bcctrx(UGeckoInstruction inst) // BO_2 == 001zy -> b if false // BO_2 == 011zy -> b if true - // Ripped from bclrx - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - Gen::CCFlags branch; - if (inst.BO_2 & BO_BRANCH_IF_TRUE) - branch = CC_Z; - else - branch = CC_NZ; - FixupBranch b = J_CC(branch, true); + FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); MOV(32, R(EAX), M(&CTR)); AND(32, R(EAX), Imm32(0xFFFFFFFC)); //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() @@ -222,11 +213,8 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pConditionDontBranch; if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit { - TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3))); - if (inst.BO & BO_BRANCH_IF_TRUE) // Conditional branch - pConditionDontBranch = J_CC(CC_Z, true); - else - pConditionDontBranch = J_CC(CC_NZ, true); + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } // This below line can be used to prove that blr "eats flags" in practice. diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 0552a2c751d4..9ba4588ad4e3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -236,26 +236,33 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - // Equal - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); + // Read the documentation about cr_val in PowerPC.h to understand these + // magic values. + + // Equal: !GT (bit 63 set), !LT (bit 62 not set), !SO (bit 61 not set), EQ + // (bits 31-0 not set). + MOV(64, R(RAX), Imm64(0x8000000000000000)); continue1 = J(); - // NAN + // NAN: !GT (bit 63 set), !LT (bit 62 not set), SO (bit 61 set), !EQ (bit 0 + // set). SetJumpTarget(pNaN); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1)); + MOV(64, R(RAX), Imm64(0xA000000000000001)); if (a != b) { continue2 = J(); - // Greater Than + // Greater Than: GT (bit 63 not set), !LT (bit 62 not set), !SO (bit 61 + // not set), !EQ (bit 0 set). SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); + MOV(64, R(RAX), Imm64(0x0000000000000001)); continue3 = J(); - // Less Than + // Less Than: !GT (bit 63 set), LT (bit 62 set), !SO (bit 61 not set), + // !EQ (bit 0 set). SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); + MOV(64, R(RAX), Imm64(0xC000000000000001)); } SetJumpTarget(continue1); @@ -265,5 +272,6 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 389a6368f31a..c3d54217386d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -116,57 +116,19 @@ void Jit64::GenerateCarry() SetJumpTarget(pContinue); } -// Assumes that Sign and Zero flags were set by the last operation. Preserves all flags and registers. -void Jit64::GenerateRC() -{ - FixupBranch pZero = J_CC(CC_Z); - FixupBranch pNegative = J_CC(CC_S); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // Result > 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pNegative); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // Result < 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pZero); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // Result == 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); -} - void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - s32 value = (s32)arg.offset; - if (value < 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); - else if (value > 0) - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); - else - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); + // TODO(delroth): Moving a 32 bit immediate to the lower part of a 64 + // bit reg will sign extend. + MOV(64, R(RAX), Imm64((s64)(s32)arg.offset)); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } else { - if (arg.IsSimpleReg()) - TEST(32, arg, arg); - else - CMP(32, arg, Imm8(0)); - FixupBranch pLesser = J_CC(CC_L); - FixupBranch pGreater = J_CC(CC_G); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0 - - SetJumpTarget(continue1); - SetJumpTarget(continue2); + MOVSX(64, 32, RAX, arg); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); } } @@ -192,26 +154,20 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void { gpr.KillImmediate(d, true, true); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } else { gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (Rc) - { - // All of the possible passed operators affect Sign/Zero flags - GenerateRC(); - } if (carry) GenerateCarry(); + if (Rc) + ComputeRC(gpr.R(d)); } } else if (doop == Add) @@ -219,9 +175,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void // a == 0, which for these instructions imply value = 0 gpr.SetImmediate32(d, value); if (Rc) - { ComputeRC(gpr.R(d)); - } } else { @@ -378,7 +332,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) else compareResult = 0x8; } - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); gpr.UnlockAll(); if (merge_branch) @@ -436,71 +391,58 @@ void Jit64::cmpXX(UGeckoInstruction inst) } else { - Gen::CCFlags less_than, greater_than; if (signedCompare) { - less_than = CC_L; - greater_than = CC_G; + if (gpr.R(a).IsImm()) + MOV(64, R(RAX), gpr.R(a)); + else + MOVSX(64, 32, RAX, gpr.R(a)); + if (!comparand.IsImm()) + { + MOVSX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); + } } else { - less_than = CC_B; - greater_than = CC_A; - } + if (gpr.R(a).IsImm()) + MOV(32, R(RAX), gpr.R(a)); + else + MOVZX(64, 32, RAX, gpr.R(a)); - if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg())) - { - // Syntax for CMP is invalid with such arguments. We must load RA in a register. - gpr.BindToRegister(a, true, false); + if (comparand.IsImm()) + MOV(32, R(ABI_PARAM1), comparand); + else + MOVZX(64, 32, ABI_PARAM1, comparand); + comparand = R(ABI_PARAM1); } - CMP(32, gpr.R(a), comparand); - gpr.UnlockAll(); - - if (!merge_branch) - { - // Keep the normal code separate for clarity. + SUB(64, R(RAX), comparand); + MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); - // TODO: If we ever care about SO, borrow a trick from - // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc - } - else + if (merge_branch) { js.downcountAmount++; int test_bit = 8 >> (js.next_inst.BI & 3); - bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true; + bool condition = js.next_inst.BO & BO_BRANCH_IF_TRUE; // Test swapping (in the future, will be used to inline across branches the right way) // if (rand() & 1) // std::swap(destination1, destination2), condition = !condition; + gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 - FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); - if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + FixupBranch pDontBranch; + if (test_bit & 8) + pDontBranch = J_CC(condition ? CC_GE : CC_L); // Test < 0, so jump over if >= 0. + else if (test_bit & 4) + pDontBranch = J_CC(condition ? CC_LE : CC_G); // Test > 0, so jump over if <= 0. + else if (test_bit & 2) + pDontBranch = J_CC(condition ? CC_NE : CC_E); // Test = 0, so jump over if != 0. + else // SO bit, do not branch (we don't emulate SO for cmp). + pDontBranch = J(); + + // Code that handles successful PPC branching. if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) @@ -534,9 +476,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) PanicAlert("WTF invalid branch"); } - if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); - if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + SetJumpTarget(pDontBranch); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { @@ -619,9 +559,7 @@ void Jit64::boolX(UGeckoInstruction inst) PanicAlert("WTF!"); } if (inst.Rc) - { ComputeRC(gpr.R(a)); - } } else if ((a == s) || (a == b)) { @@ -632,19 +570,11 @@ void Jit64::boolX(UGeckoInstruction inst) if (inst.SUBOP10 == 28) /* andx */ { AND(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { AND(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { @@ -659,27 +589,15 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); AND(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { OR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { OR(32, gpr.R(a), operand); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { @@ -694,32 +612,22 @@ void Jit64::boolX(UGeckoInstruction inst) NOT(32, R(EAX)); OR(32, gpr.R(a), R(EAX)); } - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { NOT(32, gpr.R(a)); XOR(32, gpr.R(a), operand); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } else @@ -731,83 +639,53 @@ void Jit64::boolX(UGeckoInstruction inst) { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 476) /* nandx */ { MOV(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 60) /* andcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); AND(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 444) /* orx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 124) /* norx */ { MOV(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); - if (inst.Rc) - { - ComputeRC(gpr.R(a)); - } } else if (inst.SUBOP10 == 412) /* orcx */ { MOV(32, gpr.R(a), gpr.R(b)); NOT(32, gpr.R(a)); OR(32, gpr.R(a), gpr.R(s)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 316) /* xorx */ { MOV(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else if (inst.SUBOP10 == 284) /* eqvx */ { MOV(32, gpr.R(a), gpr.R(s)); NOT(32, gpr.R(a)); XOR(32, gpr.R(a), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } } else { PanicAlert("WTF!"); } + if (inst.Rc) + ComputeRC(gpr.R(a)); gpr.UnlockAll(); } } @@ -943,9 +821,8 @@ void Jit64::subfcx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) { - GenerateRC(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); FinalizeCarryOverflow(inst.OE, true); gpr.UnlockAll(); @@ -980,10 +857,9 @@ void Jit64::subfex(UGeckoInstruction inst) NOT(32, gpr.R(d)); ADC(32, gpr.R(d), gpr.R(b)); } - if (inst.Rc) { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1004,11 +880,9 @@ void Jit64::subfmex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1029,11 +903,9 @@ void Jit64::subfzex(UGeckoInstruction inst) } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } @@ -1076,14 +948,10 @@ void Jit64::subfx(UGeckoInstruction inst) MOV(32, gpr.R(d), gpr.R(b)); SUB(32, gpr.R(d), gpr.R(a)); } - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1505,14 +1373,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1521,14 +1385,10 @@ void Jit64::addx(UGeckoInstruction inst) gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1547,11 +1407,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1562,11 +1420,9 @@ void Jit64::addex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1584,11 +1440,9 @@ void Jit64::addcx(UGeckoInstruction inst) gpr.BindToRegister(d, true); JitClearCAOV(inst.OE); ADD(32, gpr.R(d), gpr.R(operand)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1598,11 +1452,9 @@ void Jit64::addcx(UGeckoInstruction inst) JitClearCAOV(inst.OE); MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryOverflow(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1621,11 +1473,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1636,11 +1486,9 @@ void Jit64::addmex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1659,11 +1507,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } else @@ -1674,11 +1520,9 @@ void Jit64::addzex(UGeckoInstruction inst) GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); - if (inst.Rc) - { - GenerateRC(); - } FinalizeCarryGenerateOverflowEAX(inst.OE); + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1714,17 +1558,13 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { SHL(32, gpr.R(a), Imm8(inst.SH)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH) { SHR(32, gpr.R(a), Imm8(inst.MB)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1736,9 +1576,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else if (inst.Rc) { @@ -1818,9 +1656,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) XOR(32, gpr.R(a), R(EAX)); } if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } else { @@ -1828,9 +1664,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) AND(32, gpr.R(a), Imm32(~mask)); XOR(32, gpr.R(a), gpr.R(s)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); } gpr.UnlockAll(); } @@ -1864,9 +1698,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) ROL(32, gpr.R(a), R(ECX)); AND(32, gpr.R(a), Imm32(mask)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -1898,14 +1730,10 @@ void Jit64::negx(UGeckoInstruction inst) if (a != d) MOV(32, gpr.R(d), gpr.R(a)); NEG(32, gpr.R(d)); - if (inst.Rc) - { - GenerateRC(); - } if (inst.OE) - { GenerateOverflow(); - } + if (inst.Rc) + ComputeRC(gpr.R(d)); gpr.UnlockAll(); } } @@ -1994,7 +1822,7 @@ void Jit64::slwx(UGeckoInstruction inst) if (inst.Rc) { AND(32, gpr.R(a), gpr.R(a)); - GenerateRC(); + ComputeRC(gpr.R(a)); } else { @@ -2104,9 +1932,7 @@ void Jit64::srawix(UGeckoInstruction inst) } SAR(32, gpr.R(a), Imm8(amount)); if (inst.Rc) - { - GenerateRC(); - } + ComputeRC(gpr.R(a)); SHL(32, R(EAX), Imm8(32-amount)); TEST(32, R(EAX), gpr.R(a)); FixupBranch nocarry = J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 977e298dd276..f31465f599a7 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -10,6 +10,130 @@ #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/JitRegCache.h" +void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out) +{ + switch (bit) + { + case 0: // SO, check bit 61 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + case 1: // EQ, check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + SETcc(CC_Z, R(out)); + break; + + case 2: // GT, check val > 0 + MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(ABI_PARAM1), R(ABI_PARAM1)); + SETcc(CC_G, R(out)); + break; + + case 3: // LT, check bit 62 set + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + SETcc(CC_NZ, R(out)); + break; + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } +} + +void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) +{ + MOV(64, R(ABI_PARAM2), M(&PowerPC::ppcState.cr_val[field])); + TEST(8, R(in), Imm8(1)); + FixupBranch input_is_set = J_CC(CC_NZ, false); + + // New value is 0. + switch (bit) + { + case 0: // !SO, unset bit 61 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 61))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 1: // !EQ, set bit 0 to 1 + OR(8, R(ABI_PARAM2), Imm8(1)); + break; + + case 2: // !GT, set bit 63 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 63)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 3: // !LT, unset bit 62 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 62))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + FixupBranch end = J(); + SetJumpTarget(input_is_set); + + switch (bit) + { + case 0: // SO, set bit 61 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 61)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 1: // EQ, set bits 31-0 to 0 + MOV(64, R(ABI_PARAM1), Imm64(0xFFFFFFFF00000000)); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 2: // GT, unset bit 63 + MOV(64, R(ABI_PARAM1), Imm64(~(1ull << 63))); + AND(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + + case 3: // LT, set bit 62 + MOV(64, R(ABI_PARAM1), Imm64(1ull << 62)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + break; + } + + SetJumpTarget(end); + MOV(64, R(ABI_PARAM1), Imm64(1ull << 32)); + OR(64, R(ABI_PARAM2), R(ABI_PARAM1)); + MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM2)); +} + +FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + switch (bit) + { + case 0: // SO, check bit 61 set + MOV(64, R(RAX), Imm64(1ull << 61)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + case 1: // EQ, check bits 31-0 == 0 + CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm32(0)); + return J_CC(jump_if_set ? CC_Z : CC_NZ, true); + + case 2: // GT, check val > 0 + MOV(64, R(RAX), M(&PowerPC::ppcState.cr_val[field])); + TEST(64, R(RAX), R(RAX)); + return J_CC(jump_if_set ? CC_G : CC_LE, true); + + case 3: // LT, check bit 62 set + MOV(64, R(RAX), Imm64(1ull << 62)); + TEST(64, M(&PowerPC::ppcState.cr_val[field]), R(RAX)); + return J_CC(jump_if_set ? CC_NZ : CC_Z, true); + + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + // Should never happen. + return FixupBranch(); +} + void Jit64::mtspr(UGeckoInstruction inst) { INSTRUCTION_START @@ -154,16 +278,47 @@ void Jit64::mfcr(UGeckoInstruction inst) int d = inst.RD; gpr.Lock(d); gpr.KillImmediate(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + XOR(32, R(EAX), R(EAX)); - for (int i = 1; i < 8; i++) + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + for (int i = 0; i < 8; i++) { - SHL(32, R(EAX), Imm8(4)); - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); + if (i != 0) + SHL(32, R(EAX), Imm8(4)); + + MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + + // SO: Bit 61 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(61)); + AND(8, R(tmp), Imm8(1)); + OR(8, R(EAX), R(tmp)); + + // EQ: Bits 31-0 == 0. + XOR(8, R(tmp), R(tmp)); + TEST(32, R(cr_val), R(cr_val)); + SETcc(CC_Z, R(tmp)); + SHL(8, R(tmp), Imm8(1)); + OR(8, R(EAX), R(tmp)); + + // GT: Value > 0. + TEST(64, R(cr_val), R(cr_val)); + SETcc(CC_G, R(tmp)); + SHL(8, R(tmp), Imm8(2)); + OR(8, R(EAX), R(tmp)); + + // LT: Bit 62 set. + MOV(64, R(tmp), R(cr_val)); + SHR(64, R(tmp), Imm8(62 - 3)); + AND(8, R(tmp), Imm8(0x8)); + OR(8, R(EAX), R(tmp)); } MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); + gpr.UnlockAllX(); } void Jit64::mtcrf(UGeckoInstruction inst) @@ -182,7 +337,8 @@ void Jit64::mtcrf(UGeckoInstruction inst) if ((crm & (0x80 >> i)) != 0) { u8 newcr = (gpr.R(inst.RS).offset >> (28 - (i * 4))) & 0xF; - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), Imm8(newcr)); + MOV(64, R(RAX), Imm64(PPCCRToInternal(newcr))); + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); } } } @@ -190,17 +346,50 @@ void Jit64::mtcrf(UGeckoInstruction inst) { gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); for (int i = 0; i < 8; i++) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(EAX), gpr.R(inst.RS)); - SHR(32, R(EAX), Imm8(28 - (i * 4))); - AND(32, R(EAX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(EAX)); + MOVZX(64, 32, EAX, gpr.R(inst.RS)); + SHR(64, R(EAX), Imm8(28 - (i * 4))); + AND(64, R(EAX), Imm32(0xF)); + + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x2)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x4)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x8)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(cr_val)); } } gpr.UnlockAll(); + gpr.UnlockAllX(); } } } @@ -213,8 +402,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRFS])); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); } } @@ -226,9 +415,41 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(28)); - MOV(8, M(&PowerPC::ppcState.cr_fast[inst.CRFD]), R(EAX)); + MOVZX(64, 32, EAX, M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(64, R(EAX), Imm8(28)); + + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + X64Reg cr_val = ABI_PARAM1; + X64Reg tmp = ABI_PARAM2; + + MOV(64, R(cr_val), Imm64(1ull << 32)); + + // SO + MOV(64, R(tmp), R(EAX)); + SHL(64, R(tmp), Imm8(63)); + SHR(64, R(tmp), Imm8(63 - 61)); + OR(64, R(cr_val), R(tmp)); + + // EQ + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x2)); + OR(64, R(cr_val), R(tmp)); + + // GT + MOV(64, R(tmp), R(EAX)); + NOT(64, R(tmp)); + AND(64, R(tmp), Imm8(0x4)); + SHL(64, R(tmp), Imm8(63 - 2)); + OR(64, R(cr_val), R(tmp)); + + // LT + MOV(64, R(tmp), R(EAX)); + AND(64, R(tmp), Imm8(0x8)); + SHL(64, R(tmp), Imm8(62 - 3)); + OR(64, R(cr_val), R(tmp)); + + MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(cr_val)); + gpr.UnlockAllX(); // Clear XER[0-3] AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); @@ -240,70 +461,59 @@ void Jit64::crXXX(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); _dbg_assert_msg_(DYNA_REC, inst.OPCD == 19, "Invalid crXXX"); + // TODO(delroth): Potential optimizations could be applied here. For + // instance, if the two CR bits being loaded are the same, two loads are + // not required. + // USES_CR - // Get bit CRBA in EAX aligned with bit CRBD - int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[inst.CRBA >> 2])); - if (shiftA < 0) - SHL(8, R(EAX), Imm8(-shiftA)); - else if (shiftA > 0) - SHR(8, R(EAX), Imm8(shiftA)); - - // Get bit CRBB in ECX aligned with bit CRBD - gpr.FlushLockX(ECX); - int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3); - MOV(8, R(ECX), M(&PowerPC::ppcState.cr_fast[inst.CRBB >> 2])); - if (shiftB < 0) - SHL(8, R(ECX), Imm8(-shiftB)); - else if (shiftB > 0) - SHR(8, R(ECX), Imm8(shiftB)); + gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM2); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX); // Compute combined bit switch (inst.SUBOP10) { case 33: // crnor - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 129: // crandc - NOT(8, R(ECX)); - AND(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 193: // crxor - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); break; case 225: // crnand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 257: // crand - AND(8, R(EAX), R(ECX)); + AND(8, R(EAX), R(ABI_PARAM2)); break; case 289: // creqv - XOR(8, R(EAX), R(ECX)); + XOR(8, R(EAX), R(ABI_PARAM2)); NOT(8, R(EAX)); break; case 417: // crorc - NOT(8, R(ECX)); - OR(8, R(EAX), R(ECX)); + NOT(8, R(ABI_PARAM2)); + OR(8, R(EAX), R(ABI_PARAM2)); break; case 449: // cror - OR(8, R(EAX), R(ECX)); + OR(8, R(EAX), R(ABI_PARAM2)); break; } // Store result bit in CRBD - AND(8, R(EAX), Imm8(0x8 >> (inst.CRBD & 3))); - AND(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), Imm8(~(0x8 >> (inst.CRBD & 3)))); - OR(8, M(&PowerPC::ppcState.cr_fast[inst.CRBD >> 2]), R(EAX)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index b0070957616b..bbc0751074bb 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -763,7 +763,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { if (!thisUsed) break; X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); + // TODO(delroth): unbreak + //Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg])); RI.regs[reg] = I; break; } @@ -816,7 +817,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); unsigned ppcreg = *I >> 16; // CAUTION: uses 8-bit reg! - Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); + // TODO(delroth): Unbreak. + //Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX)); regNormalRegClear(RI, I); break; } diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index f0f1404b19b7..55576cf4c178 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -482,9 +482,9 @@ void JitIL::Trace() } #endif - DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRfast: %02x%02x%02x%02x%02x%02x%02x%02x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.cr_fast[0], PowerPC::ppcState.cr_fast[1], PowerPC::ppcState.cr_fast[2], PowerPC::ppcState.cr_fast[3], - PowerPC::ppcState.cr_fast[4], PowerPC::ppcState.cr_fast[5], PowerPC::ppcState.cr_fast[6], PowerPC::ppcState.cr_fast[7], PowerPC::ppcState.fpscr, + DEBUG_LOG(DYNA_REC, "JITIL PC: %08x SRR0: %08x SRR1: %08x CRval: %016lx%016lx%016lx%016lx%016lx%016lx%016lx%016lx FPSCR: %08x MSR: %08x LR: %08x %s %s", + PC, SRR0, SRR1, PowerPC::ppcState.cr_val[0], PowerPC::ppcState.cr_val[1], PowerPC::ppcState.cr_val[2], PowerPC::ppcState.cr_val[3], + PowerPC::ppcState.cr_val[4], PowerPC::ppcState.cr_val[5], PowerPC::ppcState.cr_val[6], PowerPC::ppcState.cr_val[7], PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); } diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index db4e755de459..22be7df9187b 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -41,10 +41,10 @@ PPCDebugInterface debug_interface; u32 CompactCR() { - u32 new_cr = ppcState.cr_fast[0] << 28; - for (int i = 1; i < 8; i++) + u32 new_cr = 0; + for (int i = 0; i < 8; i++) { - new_cr |= ppcState.cr_fast[i] << (28 - i * 4); + new_cr |= GetCRField(i) << (28 - i * 4); } return new_cr; } @@ -53,7 +53,7 @@ void ExpandCR(u32 cr) { for (int i = 0; i < 8; i++) { - ppcState.cr_fast[i] = (cr >> (28 - i * 4)) & 0xF; + SetCRField(i, (cr >> (28 - i * 4)) & 0xF); } } @@ -99,7 +99,8 @@ void ResetRegisters() ppcState.pc = 0; ppcState.npc = 0; ppcState.Exceptions = 0; - ((u64*)(&ppcState.cr_fast[0]))[0] = 0; + for (auto& v : ppcState.cr_val) + v = 0x8000000000000001; TL = 0; TU = 0; diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 8952c7f34611..471edeac72c7 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -38,7 +38,20 @@ struct GC_ALIGNED64(PowerPCState) u32 pc; // program counter u32 npc; - u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast? + // Optimized CR implementation. Instead of storing CR in its PowerPC format + // (4 bit value, SO/EQ/LT/GT), we store instead a 64 bit value for each of + // the 8 CR register parts. This 64 bit value follows this format: + // - SO iff. bit 61 is set + // - EQ iff. lower 32 bits == 0 + // - GT iff. (s64)cr_val > 0 + // - LT iff. bit 62 is set + // + // This has the interesting property that sign-extending the result of an + // operation from 32 to 64 bits results in a 64 bit value that works as a + // CR value. Checking each part of CR is also fast, as it is equivalent to + // testing one bit or the low 32 bit part of a register. And CR can still + // be manipulated bit by bit fairly easily. + u64 cr_val[8]; u32 msr; // machine specific register u32 fpscr; // floating point flags/status bits @@ -144,27 +157,54 @@ void UpdatePerformanceMonitor(u32 cycles, u32 num_load_stores, u32 num_fp_inst); } // namespace -// Fast CR system - store them in single bytes instead of nibbles to not have to -// mask/shift them out. +// Convert between PPC and internal representation of CR. +inline u64 PPCCRToInternal(u8 value) +{ + u64 cr_val = 0x100000000; + // SO + cr_val |= (u64)!!(value & 1) << 61; + // EQ + cr_val |= (u64)!(value & 2); + // GT + cr_val |= (u64)!(value & 4) << 63; + // LT + cr_val |= (u64)!!(value & 8) << 62; + + return cr_val; +} -// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all. +// Warning: these CR operations are fairly slow since they need to convert from +// PowerPC format (4 bit) to our internal 64 bit format. See the definition of +// ppcState.cr_val for more explanations. inline void SetCRField(int cr_field, int value) { - PowerPC::ppcState.cr_fast[cr_field] = value; + PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value); } inline u32 GetCRField(int cr_field) { - return PowerPC::ppcState.cr_fast[cr_field]; + u64 cr_val = PowerPC::ppcState.cr_val[cr_field]; + u32 ppc_cr = 0; + + // SO + ppc_cr |= !!(cr_val & (1ull << 61)); + // EQ + ppc_cr |= ((cr_val & 0xFFFFFFFF) == 0) << 1; + // GT + ppc_cr |= ((s64)cr_val > 0) << 2; + // LT + ppc_cr |= !!(cr_val & (1ull << 62)) << 3; + + return ppc_cr; } inline u32 GetCRBit(int bit) { - return (PowerPC::ppcState.cr_fast[bit >> 2] >> (3 - (bit & 3))) & 1; + return (GetCRField(bit >> 2) >> (3 - (bit & 3))) & 1; } inline void SetCRBit(int bit, int value) { if (value & 1) - PowerPC::ppcState.cr_fast[bit >> 2] |= 0x8 >> (bit & 3); + SetCRField(bit >> 2, GetCRField(bit >> 2) | (0x8 >> (bit & 3))); else - PowerPC::ppcState.cr_fast[bit >> 2] &= ~(0x8 >> (bit & 3)); + SetCRField(bit >> 2, GetCRField(bit >> 2) & ~(0x8 >> (bit & 3))); } // SetCR and GetCR are fairly slow. Should be avoided if possible.