Permalink
Browse files

Merge pull request #9834 from hrydgard/remove-rip-addressing

x86-64: Add ability to run without RIP addressing between generated code and global data
  • Loading branch information...
hrydgard committed Jul 7, 2017
2 parents e6e96c0 + 8d86463 commit a9f70d1c5adab3ce2f4c4f093ad662f40d6fa909
View
@@ -120,27 +120,30 @@ static void *SearchForFreeMem(size_t size) {
void *AllocateExecutableMemory(size_t size) {
#if defined(_WIN32)
void *ptr;
void *ptr = nullptr;
DWORD prot = PAGE_EXECUTE_READWRITE;
if (PlatformIsWXExclusive())
prot = PAGE_READWRITE;
if (sys_info.dwPageSize == 0)
GetSystemInfo(&sys_info);
#if defined(_M_X64)
if ((uintptr_t)&hint_location > 0xFFFFFFFFULL) {
size_t aligned_size = round_page(size);
#if 1 // Turn off to hunt for RIP bugs on x86-64.
ptr = SearchForFreeMem(aligned_size);
if (!ptr) {
// Let's try again, from the top.
// When we deallocate, this doesn't change, so we eventually run out of space.
last_executable_addr = 0;
ptr = SearchForFreeMem(aligned_size);
}
#endif
if (ptr) {
ptr = VirtualAlloc(ptr, aligned_size, MEM_RESERVE | MEM_COMMIT, prot);
} else {
ERROR_LOG(COMMON, "Unable to find nearby executable memory for jit");
WARN_LOG(COMMON, "Unable to find nearby executable memory for jit. Proceeding with far memory.");
// Can still run, thanks to "RipAccessible".
ptr = VirtualAlloc(nullptr, aligned_size, MEM_RESERVE | MEM_COMMIT, prot);
}
}
else
View
@@ -269,7 +269,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
{
mod = 0;
}
else if (ioff<-128 || ioff>127)
else if (ioff < -128 || ioff > 127)
{
mod = 2; //32-bit displacement
}
@@ -1497,8 +1497,8 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
else
{
arg.operandReg = src;
arg.WriteRex(this, 0, 0);
Write8(0x66);
arg.WriteRex(this, 0, 0);
Write8(0x0f);
Write8(0xD6);
arg.WriteRest(this, 0);
View
@@ -20,6 +20,7 @@
#include "ppsspp_config.h"
#include <cstddef>
#include "Common.h"
#include "CodeBlock.h"
@@ -358,8 +359,6 @@ class XEmitter
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
protected:
inline void Write8(u8 value) {*code++ = value;}
inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
@@ -1072,6 +1071,16 @@ class XEmitter
class XCodeBlock : public CodeBlock<XEmitter> {
public:
void PoisonMemory(int offset) override;
bool RipAccessible(const void *ptr) const {
// For debugging
// return false;
#ifdef _M_IX86
return true;
#else
ptrdiff_t diff = GetCodePtr() - (const uint8_t *)ptr;
return diff > -0x7FFFFFFF && diff < 0x7FFFFFFF;
#endif
}
};
} // namespace
View
@@ -17,7 +17,10 @@
#pragma once
#include <cstddef>
#include "util/random/rng.h"
#include "Common/Common.h"
#include "Common/CommonTypes.h"
// #include "Core/CoreParameter.h"
#include "Core/Opcode.h"
@@ -145,6 +148,26 @@ extern u8 fromvoffset[128];
enum class CPUCore;
#if defined(PPSSPP_ARCH_X86) || defined(PPSSPP_ARCH_AMD64)
// Note that CTXREG is offset to point at the first floating point register, intentionally. This is so that a byte offset
// can reach both GPR and FPR regs.
#define MIPSSTATE_VAR(x) MDisp(X64JitConstants::CTXREG, \
(int)(offsetof(MIPSState, x) - offsetof(MIPSState, f[0])))
// Workaround for compilers that don't like dynamic indexing in offsetof
#define MIPSSTATE_VAR_ELEM32(x, i) MDisp(X64JitConstants::CTXREG, \
(int)(offsetof(MIPSState, x) - offsetof(MIPSState, f[0]) + (i) * 4))
// To get RIP/relative addressing (requires tight memory control so generated code isn't too far from the binary, and a reachable variable called mips):
// #define MIPSSTATE_VAR(x) M(&mips->x)
#endif
enum {
NUM_X86_FPU_TEMPS = 16,
};
class MIPSState
{
public:
@@ -202,14 +225,32 @@ class MIPSState
bool inDelaySlot;
int llBit; // ll/sc
u32 temp; // can be used to save temporaries during calculations when we need more than R0 and R1
u32 mxcsrTemp;
// Temporary used around delay slots and similar.
u64 saved_flags;
GMRng rng; // VFPU hardware random number generator. Probably not the right type.
// Debug stuff
u32 debugCount; // can be used to count basic blocks before crashes, etc.
// Temps needed for JitBranch.cpp experiments
u32 intBranchExit;
u32 jitBranchExit;
u32 savedPC;
u32 MEMORY_ALIGNED16(vcmpResult[4]);
float sincostemp[2];
static const u32 FCR0_VALUE = 0x00003351;
#if defined(PPSSPP_ARCH_X86) || defined(PPSSPP_ARCH_AMD64)
// FPU TEMP0, etc. are swapped in here if necessary (e.g. on x86.)
float tempValues[NUM_X86_FPU_TEMPS];
#endif
u8 VfpuWriteMask() const {
return (vfpuCtrl[VFPU_CTRL_DPREFIX] >> 8) & 0xF;
}
View
@@ -70,21 +70,21 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
BeginWrite();
restoreRoundingMode = AlignCode16(); {
STMXCSR(M(&mips_->temp));
STMXCSR(MIPSSTATE_VAR(temp));
// Clear the rounding mode and flush-to-zero bits back to 0.
AND(32, M(&mips_->temp), Imm32(~(7 << 13)));
LDMXCSR(M(&mips_->temp));
AND(32, MIPSSTATE_VAR(temp), Imm32(~(7 << 13)));
LDMXCSR(MIPSSTATE_VAR(temp));
RET();
}
applyRoundingMode = AlignCode16(); {
MOV(32, R(EAX), M(&mips_->fcr31));
MOV(32, R(EAX), MIPSSTATE_VAR(fcr31));
AND(32, R(EAX), Imm32(0x01000003));
// If it's 0 (nearest + no flush0), we don't actually bother setting - we cleared the rounding
// mode out in restoreRoundingMode anyway. This is the most common.
FixupBranch skip = J_CC(CC_Z);
STMXCSR(M(&mips_->temp));
STMXCSR(MIPSSTATE_VAR(temp));
// The MIPS bits don't correspond exactly, so we have to adjust.
// 0 -> 0 (skip2), 1 -> 3, 2 -> 2 (skip2), 3 -> 1
@@ -96,33 +96,28 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
// Adjustment complete, now reconstruct MXCSR
SHL(32, R(EAX), Imm8(13));
// Before setting new bits, we must clear the old ones.
AND(32, M(&mips_->temp), Imm32(~(7 << 13))); // Clearing bits 13-14 (rounding mode) and 15 (flush to zero)
OR(32, M(&mips_->temp), R(EAX));
AND(32, MIPSSTATE_VAR(temp), Imm32(~(7 << 13))); // Clearing bits 13-14 (rounding mode) and 15 (flush to zero)
OR(32, MIPSSTATE_VAR(temp), R(EAX));
TEST(32, M(&mips_->fcr31), Imm32(1 << 24));
TEST(32, MIPSSTATE_VAR(fcr31), Imm32(1 << 24));
FixupBranch skip3 = J_CC(CC_Z);
OR(32, M(&mips_->temp), Imm32(1 << 15));
OR(32, MIPSSTATE_VAR(temp), Imm32(1 << 15));
SetJumpTarget(skip3);
LDMXCSR(M(&mips_->temp));
LDMXCSR(MIPSSTATE_VAR(temp));
SetJumpTarget(skip);
RET();
}
updateRoundingMode = AlignCode16(); {
// If it's only ever 0, we don't actually bother applying or restoring it.
// This is the most common situation.
TEST(32, M(&mips_->fcr31), Imm32(0x01000003));
TEST(32, MIPSSTATE_VAR(fcr31), Imm32(0x01000003));
FixupBranch skip = J_CC(CC_Z);
#ifdef _M_X64
// TODO: Move the hasSetRounding flag somewhere we can reach it through the context pointer, or something.
MOV(64, R(RAX), Imm64((uintptr_t)&js.hasSetRounding));
MOV(PTRBITS, R(RAX), ImmPtr(&js.hasSetRounding));
MOV(8, MatR(RAX), Imm8(1));
#else
MOV(8, M(&js.hasSetRounding), Imm8(1));
#endif
SetJumpTarget(skip);
RET();
}
@@ -153,7 +148,12 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
FixupBranch bailCoreState = J_CC(CC_S, true);
SetJumpTarget(skipToCoreStateCheck);
CMP(32, M(&coreState), Imm32(0));
if (RipAccessible((const void *)&coreState)) {
CMP(32, M(&coreState), Imm32(0)); // rip accessible
} else {
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
CMP(32, MatR(RAX), Imm32(0));
}
FixupBranch badCoreState = J_CC(CC_NZ, true);
FixupBranch skipToRealDispatch2 = J(); //skip the sync and compare first time
@@ -167,7 +167,7 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
dispatcherNoCheck = GetCodePtr();
MOV(32, R(EAX), M(&mips_->pc));
MOV(32, R(EAX), MIPSSTATE_VAR(pc));
dispatcherInEAXNoCheck = GetCodePtr();
#ifdef MASKED_PSP_MEMORY
@@ -185,9 +185,8 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
SHR(32, R(EDX), Imm8(24));
CMP(32, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24));
FixupBranch notfound = J_CC(CC_NE);
if (enableDebug)
{
ADD(32, M(&mips_->debugCount), Imm8(1));
if (enableDebug) {
ADD(32, MIPSSTATE_VAR(debugCount), Imm8(1));
}
//grab from list and jump to it
AND(32, R(EAX), Imm32(MIPS_EMUHACK_VALUE_MASK));
@@ -211,7 +210,12 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
SetJumpTarget(bail);
SetJumpTarget(bailCoreState);
CMP(32, M(&coreState), Imm32(0));
if (RipAccessible((const void *)&coreState)) {
CMP(32, M(&coreState), Imm32(0)); // rip accessible
} else {
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
CMP(32, MatR(RAX), Imm32(0));
}
J_CC(CC_Z, outerLoop, true);
SetJumpTarget(badCoreState);
@@ -77,11 +77,7 @@ namespace MIPSComp
{
using namespace Gen;
static u32 intBranchExit;
static u32 jitBranchExit;
static void JitBranchLog(MIPSOpcode op, u32 pc)
{
static void JitBranchLog(MIPSOpcode op, u32 pc) {
currentMIPS->pc = pc;
currentMIPS->inDelaySlot = false;
@@ -91,15 +87,15 @@ static void JitBranchLog(MIPSOpcode op, u32 pc)
// Branch taken, use nextPC.
if (currentMIPS->inDelaySlot)
intBranchExit = currentMIPS->nextPC;
currentMIPS->intBranchExit = currentMIPS->nextPC;
else
{
// Branch not taken, likely delay slot skipped.
if (info & LIKELY)
intBranchExit = currentMIPS->pc;
currentMIPS->intBranchExit = currentMIPS->pc;
// Branch not taken, so increment over delay slot.
else
intBranchExit = currentMIPS->pc + 4;
currentMIPS->intBranchExit = currentMIPS->pc + 4;
}
currentMIPS->pc = pc;
@@ -110,7 +106,7 @@ static void JitBranchLogMismatch(MIPSOpcode op, u32 pc)
{
char temp[256];
MIPSDisAsm(op, pc, temp, true);
ERROR_LOG(JIT, "Bad jump: %s - int:%08x jit:%08x", temp, intBranchExit, jitBranchExit);
ERROR_LOG(JIT, "Bad jump: %s - int:%08x jit:%08x", temp, currentMIPS->intBranchExit, currentMIPS->jitBranchExit);
host->SetDebugMode(true);
}
@@ -124,14 +120,14 @@ void Jit::BranchLogExit(MIPSOpcode op, u32 dest, bool useEAX)
{
OpArg destArg = useEAX ? R(EAX) : Imm32(dest);
CMP(32, M(&intBranchExit), destArg);
CMP(32, MIPSSTATE_VAR(intBranchExit), destArg);
FixupBranch skip = J_CC(CC_E);
MOV(32, M(&jitBranchExit), destArg);
MOV(32, MIPSSTATE_VAR(jitBranchExit), destArg);
ABI_CallFunctionCC(thunks.ProtectFunction(&JitBranchLogMismatch), op.encoding, GetCompilerPC());
// Restore EAX, we probably ruined it.
if (useEAX)
MOV(32, R(EAX), M(&jitBranchExit));
MOV(32, R(EAX), MIPSSTATE_VAR(jitBranchExit));
SetJumpTarget(skip);
}
@@ -662,8 +658,6 @@ void Jit::Comp_Jump(MIPSOpcode op) {
js.compiling = false;
}
static u32 savedPC;
void Jit::Comp_JumpReg(MIPSOpcode op)
{
CONDITIONAL_LOG;
@@ -686,7 +680,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
{
// If this is a syscall, write the pc (for thread switching and other good reasons.)
gpr.MapReg(rs, true, false);
MOV(32, M(&mips_->pc), gpr.R(rs));
MOV(32, MIPSSTATE_VAR(pc), gpr.R(rs));
if (andLink)
gpr.SetImm(rd, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_FLUSH);
@@ -729,21 +723,18 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
MOV(32, R(EAX), gpr.R(rs));
}
FlushAll();
}
else
{
} else {
// Latch destination now - save it in memory.
gpr.MapReg(rs, true, false);
MOV(32, M(&savedPC), gpr.R(rs));
MOV(32, MIPSSTATE_VAR(savedPC), gpr.R(rs));
if (andLink)
gpr.SetImm(rd, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
MOV(32, R(EAX), M(&savedPC));
MOV(32, R(EAX), MIPSSTATE_VAR(savedPC));
FlushAll();
}
switch (op & 0x3f)
{
switch (op & 0x3f) {
case 8: //jr
break;
case 9: //jalr
@@ -788,7 +779,7 @@ void Jit::Comp_Syscall(MIPSOpcode op)
js.downcountAmount = -offset;
if (!js.inDelaySlot) {
MOV(32, M(&mips_->pc), Imm32(GetCompilerPC() + 4));
MOV(32, MIPSSTATE_VAR(pc), Imm32(GetCompilerPC() + 4));
}
#ifdef USE_PROFILER
Oops, something went wrong.

0 comments on commit a9f70d1

Please sign in to comment.