diff --git a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp index 42a966d4371a..d0fde9f6f2fc 100644 --- a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp @@ -80,7 +80,12 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // If it's about to be clobbered, don't waste time pointerifying. Use displacement. bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1); - int32_t imm = (int32_t)inst.constant; + int64_t imm = (int32_t)inst.constant; + // It can't be this negative, must be a constant address with the top bit set. + if ((imm & 0xC0000000) == 0x80000000) { + imm = (uint64_t)(uint32_t)inst.constant; + } + LoadStoreArg addrArg; if (inst.src1 == MIPS_REG_ZERO) { // The constant gets applied later. @@ -100,7 +105,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // Since we can't modify src1, let's just use a temp reg while copying. if (!addrArg.useRegisterOffset) { - ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2); + ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2); #endif @@ -114,7 +119,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // The offset gets set later. addrArg.base = regs_.MapGPRAsPointer(inst.src1); } else { - ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2); + ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2); #endif @@ -137,15 +142,15 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { int scale = IROpToByteWidth(inst.op); if (imm > 0 && (imm & (scale - 1)) == 0 && imm <= 0xFFF * scale) { // Okay great, use the LDR/STR form. - addrArg.immOffset = imm; + addrArg.immOffset = (int)imm; addrArg.useUnscaled = false; } else if (imm >= -256 && imm < 256) { // An unscaled offset (LDUR/STUR) should work fine for this range. - addrArg.immOffset = imm; + addrArg.immOffset = (int)imm; addrArg.useUnscaled = true; } else { // No luck, we'll need to load into a register. - MOVI2R(SCRATCH1, (s64)imm); + MOVI2R(SCRATCH1, imm); addrArg.regOffset = SCRATCH1; addrArg.useRegisterOffset = true; addrArg.signExtendRegOffset = true; diff --git a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp index 80b149ca02f3..9db1ebb657c6 100644 --- a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp +++ b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp @@ -59,8 +59,19 @@ int32_t RiscVJitBackend::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t if (constant > 0) constant &= Memory::MEMVIEW32_MASK; #endif - LI(SCRATCH2, constant); - ADD(SCRATCH1, *reg, SCRATCH2); + // It can't be this negative, must be a constant with top bit set. + if ((constant & 0xC0000000) == 0x80000000) { + if (cpu_info.RiscV_Zba) { + LI(SCRATCH2, constant); + ADD_UW(SCRATCH1, SCRATCH2, *reg); + } else { + LI(SCRATCH2, (uint32_t)constant); + ADD(SCRATCH1, *reg, SCRATCH2); + } + } else { + LI(SCRATCH2, constant); + ADD(SCRATCH1, *reg, SCRATCH2); + } *reg = SCRATCH1; return 0; } diff --git a/Core/MIPS/x86/X64IRAsm.cpp b/Core/MIPS/x86/X64IRAsm.cpp index fc763bd07c7d..5267c1022ac5 100644 --- a/Core/MIPS/x86/X64IRAsm.cpp +++ b/Core/MIPS/x86/X64IRAsm.cpp @@ -58,7 +58,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { int jitbaseCtxDisp = 0; // We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value. intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE; - if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) { + if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) { jo.reserveR15ForAsm = true; jitbaseInR15 = true; } else { diff --git a/Core/MIPS/x86/X64IRCompLoadStore.cpp b/Core/MIPS/x86/X64IRCompLoadStore.cpp index d033832bf3f0..9b3eea1341d4 100644 --- a/Core/MIPS/x86/X64IRCompLoadStore.cpp +++ b/Core/MIPS/x86/X64IRCompLoadStore.cpp @@ -45,35 +45,41 @@ Gen::OpArg X64JitBackend::PrepareSrc1Address(IRInst inst) { // If it's about to be clobbered, don't waste time pointerifying. Use displacement. bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1); + int32_t disp = (int32_t)inst.constant; + // It can't be this negative, must be a constant address with the top bit set. + if ((disp & 0xC0000000) == 0x80000000) { + disp = inst.constant & 0x7FFFFFFF; + } + #ifdef MASKED_PSP_MEMORY - if (inst.constant > 0) - inst.constant &= Memory::MEMVIEW32_MASK; + if (disp > 0) + disp &= Memory::MEMVIEW32_MASK; #endif OpArg addrArg; if (inst.src1 == MIPS_REG_ZERO) { #ifdef MASKED_PSP_MEMORY - inst.constant &= Memory::MEMVIEW32_MASK; + disp &= Memory::MEMVIEW32_MASK; #endif #if PPSSPP_ARCH(AMD64) - addrArg = MDisp(MEMBASEREG, inst.constant & 0x7FFFFFFF); + addrArg = MDisp(MEMBASEREG, disp & 0x7FFFFFFF); #else - addrArg = M(Memory::base + inst.constant); + addrArg = M(Memory::base + disp); #endif } else if ((jo.cachePointers || src1IsPointer) && !readsFromSrc1 && (!clobbersSrc1 || src1IsPointer)) { X64Reg src1 = regs_.MapGPRAsPointer(inst.src1); - addrArg = MDisp(src1, (int)inst.constant); + addrArg = MDisp(src1, disp); } else { regs_.MapGPR(inst.src1); #ifdef MASKED_PSP_MEMORY - LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), (int)inst.constant)); + LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), disp)); AND(PTRBITS, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK)); addrArg = MDisp(SCRATCH1, (intptr_t)Memory::base); #else #if PPSSPP_ARCH(AMD64) - addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, (int)inst.constant); + addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, disp); #else - addrArg = MDisp(regs_.RX(inst.src1), Memory::base + inst.constant); + addrArg = MDisp(regs_.RX(inst.src1), Memory::base + disp); #endif #endif } diff --git a/unittest/JitHarness.cpp b/unittest/JitHarness.cpp index 4ad53fc65c1a..088416e2bcd8 100644 --- a/unittest/JitHarness.cpp +++ b/unittest/JitHarness.cpp @@ -34,6 +34,7 @@ #include "Core/MemMap.h" #include "Core/Core.h" #include "Core/CoreTiming.h" +#include "Core/Config.h" #include "Core/HLE/HLE.h" // Temporary hacks around annoying linking errors. Copied from Headless. @@ -55,9 +56,15 @@ HLEFunction UnitTestFakeSyscalls[] = { {0x1234BEEF, &UnitTestTerminator, "UnitTestTerminator"}, }; -double ExecCPUTest() { +double ExecCPUTest(bool clearCache = true) { int blockTicks = 1000000; int total = 0; + + if (MIPSComp::jit) { + currentMIPS->pc = PSP_GetUserMemoryBase(); + MIPSComp::JitAt(); + } + double st = time_now_d(); do { for (int j = 0; j < 1000; ++j) { @@ -73,6 +80,17 @@ double ExecCPUTest() { while (time_now_d() - st < 0.5); double elapsed = time_now_d() - st; + if (MIPSComp::jit) { + JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface(); + if (cache) { + JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); + WARN_LOG(JIT, "Executed %d target instrs, %d IR, for %d orig", (int)block.targetDisasm.size(), (int)block.irDisasm.size(), (int)block.origDisasm.size()); + } + + if (clearCache) + MIPSComp::jit->ClearCache(); + } + return total / elapsed; } @@ -108,6 +126,7 @@ static void DestroyJitHarness() { bool TestJit() { SetupJitHarness(); + g_Config.bFastMemory = true; currentMIPS->pc = PSP_GetUserMemoryBase(); u32 *p = (u32 *)Memory::GetPointer(currentMIPS->pc); @@ -158,6 +177,7 @@ bool TestJit() { *p++ = MIPS_MAKE_SYSCALL("UnitTestFakeSyscalls", "UnitTestTerminator"); *p++ = MIPS_MAKE_BREAK(1); + *p++ = MIPS_MAKE_JR_RA(); // Dogfood. addr = currentMIPS->pc; @@ -170,11 +190,15 @@ bool TestJit() { printf("\n"); - double jit_speed = 0.0, interp_speed = 0.0; + double jit_speed = 0.0, jit_ir_speed = 0.0, ir_speed = 0.0, interp_speed = 0.0; if (compileSuccess) { interp_speed = ExecCPUTest(); + mipsr4k.UpdateCore(CPUCore::IR_INTERPRETER); + ir_speed = ExecCPUTest(); mipsr4k.UpdateCore(CPUCore::JIT); jit_speed = ExecCPUTest(); + mipsr4k.UpdateCore(CPUCore::JIT_IR); + jit_ir_speed = ExecCPUTest(false); // Disassemble JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface(); @@ -182,14 +206,14 @@ bool TestJit() { JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); // Should only be one block. std::vector &lines = block.targetDisasm; // Cut off at 25 due to the repetition above. Might need tweaking for large instructions. - const int cutoff = 25; + const int cutoff = 50; for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) { printf("%s\n", lines[i].c_str()); } if (lines.size() > cutoff) printf("...\n"); } - printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed); + printf("Jit was %fx faster than interp, IR was %fx faster, JIT IR %fx.\n\n", jit_speed / interp_speed, ir_speed / interp_speed, jit_ir_speed / interp_speed); } printf("\n"); diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp index ac7b5ba76339..92ed78760e4b 100644 --- a/unittest/UnitTest.cpp +++ b/unittest/UnitTest.cpp @@ -58,6 +58,7 @@ #include "Common/Render/DrawBuffer.h" #include "Common/System/NativeApp.h" #include "Common/System/System.h" +#include "Common/Thread/ThreadUtil.h" #include "Common/ArmEmitter.h" #include "Common/BitScan.h" @@ -1038,6 +1039,8 @@ TestItem availableTests[] = { }; int main(int argc, const char *argv[]) { + SetCurrentThreadName("UnitTest"); + cpu_info.bNEON = true; cpu_info.bVFP = true; cpu_info.bVFPv3 = true;