diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 8a93a7a0bdbf..0860d53cf051 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -2439,6 +2439,51 @@ void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, } +void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) +{ + _assert_msg_(DYNA_REC, Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "%s must contain an extended reg as Rm!", __FUNCTION__); + + u32 encoded_size = 0; + u32 encoded_op = 0; + bool shift = false; + + if (size == 8) + { + encoded_size = 0; + encoded_op = 0; + } + else if (size == 16) + { + encoded_size = 1; + encoded_op = 0; + } + else if (size == 32) + { + encoded_size = 2; + encoded_op = 0; + } + else if (size == 64) + { + encoded_size = 3; + encoded_op = 0; + } + else if (size == 128) + { + encoded_size = 0; + encoded_op = 2; + } + + if (load) + encoded_op |= 1; + + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg()); + + Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) | \ + Rm.GetData() | (1 << 11) | (Rn << 5) | Rt); +} + void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) { EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm); @@ -2840,6 +2885,16 @@ void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm); } +// Loadstore register offset +void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) +{ + EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm); +} +void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) +{ + EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm); +} + void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn) { EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn); diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index c6be23a966d5..a5eb986e2fad 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -763,6 +763,10 @@ class ARM64FloatEmitter void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + // Loadstore register offset + void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); + void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); + // Scalar - 1 Source void FABS(ARM64Reg Rd, ARM64Reg Rn); void FNEG(ARM64Reg Rd, ARM64Reg Rn); @@ -923,6 +927,7 @@ class ARM64FloatEmitter void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign); void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode); void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); + void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper); void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index f86f02ff6347..8e4aaf59e929 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -360,10 +360,17 @@ void JitArm64::stfXX(UGeckoInstruction inst) ARM64Reg XA = EncodeRegTo64(addr_reg); - if (is_immediate) + if (is_immediate && !(jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))) + { MOVI2R(XA, imm_addr); - if (update) + if (update) + { + gpr.BindToRegister(a, false); + MOV(gpr.R(a), addr_reg); + } + } + else if (!is_immediate && update) { gpr.BindToRegister(a, false); MOV(gpr.R(a), addr_reg); @@ -386,25 +393,44 @@ void JitArm64::stfXX(UGeckoInstruction inst) else accessSize = 32; - MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount); - MOVI2R(X1, (u64)GPFifo::m_gatherPipe); - LDR(INDEX_UNSIGNED, W0, X30, 0); - ADD(X1, X1, X0); + u64 base_ptr = std::min((u64)&GPFifo::m_gatherPipeCount, (u64)&GPFifo::m_gatherPipe); + u32 count_off = (u64)&GPFifo::m_gatherPipeCount - base_ptr; + u32 pipe_off = (u64)&GPFifo::m_gatherPipe - base_ptr; + + MOVI2R(X30, base_ptr); + + if (pipe_off) + ADD(X1, X30, pipe_off); + + LDR(INDEX_UNSIGNED, W0, X30, count_off); if (accessSize == 64) { m_float_emit.REV64(8, Q0, V0); - m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0); + if (pipe_off) + m_float_emit.STR(64, Q0, X1, ArithOption(X0)); + else + m_float_emit.STR(64, Q0, X30, ArithOption(X0)); } else if (accessSize == 32) { m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0)); m_float_emit.REV32(8, D0, D0); - m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0); + if (pipe_off) + m_float_emit.STR(32, D0, X1, ArithOption(X0)); + else + m_float_emit.STR(32, D0, X30, ArithOption(X0)); + } ADD(W0, W0, accessSize >> 3); - STR(INDEX_UNSIGNED, W0, X30, 0); + STR(INDEX_UNSIGNED, W0, X30, count_off); jit->js.fifoBytesThisBlock += accessSize >> 3; + if (update) + { + // Chance of this happening is fairly low, but support it + gpr.BindToRegister(a, false); + MOVI2R(gpr.R(a), imm_addr); + } } else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) {