From 1d152a1486783556c95b88de3124fe83ab096d8b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Sep 2023 12:26:21 -0700 Subject: [PATCH 1/3] x86jit: Bake emuhack mask into jitbase. --- Core/MIPS/RiscV/RiscVRegCache.cpp | 4 ++++ Core/MIPS/x86/X64IRAsm.cpp | 18 +++++++++--------- Core/MIPS/x86/X64IRRegCache.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/Core/MIPS/RiscV/RiscVRegCache.cpp b/Core/MIPS/RiscV/RiscVRegCache.cpp index 3e98406380a2..720cc40f8128 100644 --- a/Core/MIPS/RiscV/RiscVRegCache.cpp +++ b/Core/MIPS/RiscV/RiscVRegCache.cpp @@ -206,6 +206,10 @@ RiscVGen::RiscVReg RiscVRegCache::Normalize32(IRReg mipsReg, RiscVGen::RiscVReg emit_->SEXT_W(destReg, (RiscVReg)mr[mipsReg].nReg); } break; + + default: + _assert_msg_(false, "Should not normalize32 floats"); + break; } return destReg == INVALID_REG ? reg : destReg; diff --git a/Core/MIPS/x86/X64IRAsm.cpp b/Core/MIPS/x86/X64IRAsm.cpp index 770590529871..2a5fb91152c1 100644 --- a/Core/MIPS/x86/X64IRAsm.cpp +++ b/Core/MIPS/x86/X64IRAsm.cpp @@ -56,15 +56,16 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { #if PPSSPP_ARCH(AMD64) bool jitbaseInR15 = false; int jitbaseCtxDisp = 0; - uintptr_t jitbase = (uintptr_t)GetBasePtr(); - if (jitbase > 0x7FFFFFFFULL && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) { + // We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value. + intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE; + if ((jitbase < -0x80000000ULL || jitbase > 0x7FFFFFFFULL) && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) { jo.reserveR15ForAsm = true; jitbaseInR15 = true; } else { jo.downcountInRegister = true; jo.reserveR15ForAsm = true; - if (jitbase > 0x7FFFFFFFULL) { - jitbaseCtxDisp = (int)(GetBasePtr() - (const u8 *)&mipsState->f[0]); + if (jitbase < -0x80000000ULL || jitbase > 0x7FFFFFFFULL) { + jitbaseCtxDisp = (int)(jitbase - (intptr_t)&mipsState->f[0]); } } #endif @@ -139,7 +140,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // Two x64-specific statically allocated registers. MOV(64, R(MEMBASEREG), ImmPtr(Memory::base)); if (jitbaseInR15) - MOV(64, R(JITBASEREG), ImmPtr(GetBasePtr())); + MOV(64, R(JITBASEREG), ImmPtr((const void *)jitbase)); #endif // From the start of the FP reg, a single byte offset can reach all GPR + all FPR (but not VFPR.) MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0])); @@ -228,10 +229,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { CMP(32, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24)); } FixupBranch needsCompile = J_CC(CC_NE); - // Mask by 0x00FFFFFF and extract the block jit offset. - AND(32, R(SCRATCH1), Imm32(MIPS_EMUHACK_VALUE_MASK)); + // We don't mask here - that's baked into jitbase. #if PPSSPP_ARCH(X86) - LEA(32, SCRATCH1, MDisp(SCRATCH1, (u32)GetBasePtr())); + LEA(32, SCRATCH1, MDisp(SCRATCH1, (u32)GetBasePtr() - MIPS_EMUHACK_VALUE_MASK)); #elif PPSSPP_ARCH(AMD64) if (jitbaseInR15) { ADD(64, R(SCRATCH1), R(JITBASEREG)); @@ -239,7 +239,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { LEA(64, SCRATCH1, MComplex(CTXREG, SCRATCH1, SCALE_1, jitbaseCtxDisp)); } else { // See above, reserveR15ForAsm is used when above 0x7FFFFFFF. - LEA(64, SCRATCH1, MDisp(SCRATCH1, (u32)jitbase)); + LEA(64, SCRATCH1, MDisp(SCRATCH1, (s32)jitbase)); } #endif JMPptr(R(SCRATCH1)); diff --git a/Core/MIPS/x86/X64IRRegCache.h b/Core/MIPS/x86/X64IRRegCache.h index 953c92e759ff..4ff214f3984e 100644 --- a/Core/MIPS/x86/X64IRRegCache.h +++ b/Core/MIPS/x86/X64IRRegCache.h @@ -30,6 +30,7 @@ namespace X64IRJitConstants { #if PPSSPP_ARCH(AMD64) const Gen::X64Reg MEMBASEREG = Gen::RBX; const Gen::X64Reg CTXREG = Gen::R14; +// Note: this is actually offset from the base. const Gen::X64Reg JITBASEREG = Gen::R15; const Gen::X64Reg DOWNCOUNTREG = Gen::R15; #else From 0452b8b57a941635b90bbf281d58a328e8546081 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Sep 2023 13:29:05 -0700 Subject: [PATCH 2/3] riscv: Account for emuhack in JITBASEREG. --- Core/MIPS/RiscV/RiscVAsm.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Core/MIPS/RiscV/RiscVAsm.cpp b/Core/MIPS/RiscV/RiscVAsm.cpp index 2bbd420f4f47..135e0604e8a3 100644 --- a/Core/MIPS/RiscV/RiscVAsm.cpp +++ b/Core/MIPS/RiscV/RiscVAsm.cpp @@ -121,7 +121,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { // Fixed registers, these are always kept when in Jit context. LI(MEMBASEREG, Memory::base, SCRATCH1); LI(CTXREG, mipsState, SCRATCH1); - LI(JITBASEREG, GetBasePtr(), SCRATCH1); + LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1); LoadStaticRegisters(); MovFromPC(SCRATCH1); @@ -173,9 +173,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { // We're in other words comparing to the top 8 bits of MIPS_EMUHACK_OPCODE by subtracting. ADDI(SCRATCH2, SCRATCH2, -(MIPS_EMUHACK_OPCODE >> 24)); FixupBranch needsCompile = BNE(SCRATCH2, R_ZERO); - // Use a wall to mask by 0x00FFFFFF and extract the block jit offset. - SLLI(SCRATCH1, SCRATCH1, XLEN - 24); - SRLI(SCRATCH1, SCRATCH1, XLEN - 24); + // No need to mask, JITBASEREG has already accounted for the upper bits. ADD(SCRATCH1, JITBASEREG, SCRATCH1); JR(SCRATCH1); SetJumpTarget(needsCompile); From 9439a433233e88779210f4796e3f5cb06bada41d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Sep 2023 13:29:57 -0700 Subject: [PATCH 3/3] riscv: Correct an overlap case, fix assert. --- Core/MIPS/IR/IRRegCache.cpp | 5 +++-- Core/MIPS/RiscV/RiscVCompVec.cpp | 24 +++++++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index f2554bc630a5..698929a21ad8 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -784,10 +784,11 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) { return; } + bool mapSIMD = config_.mapFPUSIMD || mapping[i].type == 'G'; MIPSMap flags = mapping[i].flags; for (int j = 0; j < count; ++j) { if (mapping[j].type == mapping[i].type && mapping[j].reg == mapping[i].reg && i != j) { - _assert_msg_(mapping[j].lanes == mapping[i].lanes, "Lane aliasing not supported yet"); + _assert_msg_(!mapSIMD || mapping[j].lanes == mapping[i].lanes, "Lane aliasing not supported yet"); if (!isNoinit(mapping[j].flags) && isNoinit(flags)) { flags = (flags & MIPSMap::BACKEND_MASK) | MIPSMap::DIRTY; @@ -795,7 +796,7 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) { } } - if (config_.mapFPUSIMD || mapping[i].type == 'G') { + if (mapSIMD) { MapNativeReg(type, mapping[i].reg, mapping[i].lanes, flags); return; } diff --git a/Core/MIPS/RiscV/RiscVCompVec.cpp b/Core/MIPS/RiscV/RiscVCompVec.cpp index 8db60a76a8f0..3d91312a8cca 100644 --- a/Core/MIPS/RiscV/RiscVCompVec.cpp +++ b/Core/MIPS/RiscV/RiscVCompVec.cpp @@ -35,6 +35,10 @@ namespace MIPSComp { using namespace RiscVGen; using namespace RiscVJitConstants; +static bool Overlap(IRReg r1, int l1, IRReg r2, int l2) { + return r1 < r2 + l2 && r1 + l1 > r2; +} + void RiscVJitBackend::CompIR_VecAssign(IRInst inst) { CONDITIONAL_DISABLE; @@ -215,10 +219,21 @@ void RiscVJitBackend::CompIR_VecArith(IRInst inst) { break; case IROp::Vec4Scale: - // TODO: This works for now, but may need to handle aliasing for vectors. regs_.Map(inst); - for (int i = 0; i < 4; ++i) - FMUL(32, regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2)); + if (Overlap(inst.src2, 1, inst.dest, 3)) { + // We have to handle overlap, doing dest == src2 last. + for (int i = 0; i < 4; ++i) { + if (inst.src2 != inst.dest + i) + FMUL(32, regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2)); + } + for (int i = 0; i < 4; ++i) { + if (inst.src2 == inst.dest + i) + FMUL(32, regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2)); + } + } else { + for (int i = 0; i < 4; ++i) + FMUL(32, regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2)); + } break; case IROp::Vec4Neg: @@ -244,9 +259,8 @@ void RiscVJitBackend::CompIR_VecHoriz(IRInst inst) { switch (inst.op) { case IROp::Vec4Dot: - // TODO: This works for now, but may need to handle aliasing for vectors. regs_.Map(inst); - if ((inst.dest < inst.src1 + 4 && inst.dest >= inst.src1) || (inst.dest < inst.src2 + 4 && inst.dest >= inst.src2)) { + if (Overlap(inst.dest, 1, inst.src1, 4) || Overlap(inst.dest, 1, inst.src2, 4)) { // This means inst.dest overlaps one of src1 or src2. We have to do that one first. // Technically this may impact -0.0 and such, but dots accurately need to be aligned anyway. for (int i = 0; i < 4; ++i) {