Skip to content

Commit

Permalink
softjit: Centralize argument register allocation.
Browse files Browse the repository at this point in the history
  • Loading branch information
unknownbrackets committed Nov 28, 2021
1 parent 2a93006 commit 99c213f
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 39 deletions.
50 changes: 11 additions & 39 deletions GPU/Software/DrawPixelX86.cpp
Expand Up @@ -64,53 +64,25 @@ static OpArg MConstDisp(X64Reg r, const T *t) {
}

SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) {
// Setup the reg cache.
regCache_.Add(RAX, RegCache::GEN_INVALID);
regCache_.Add(R10, RegCache::GEN_INVALID);
regCache_.Add(R11, RegCache::GEN_INVALID);
regCache_.Add(XMM1, RegCache::VEC_INVALID);
regCache_.Add(XMM2, RegCache::VEC_INVALID);
regCache_.Add(XMM3, RegCache::VEC_INVALID);
regCache_.Add(XMM5, RegCache::VEC_INVALID);
// Setup the reg cache and disallow spill for arguments.
regCache_.SetupABI({
RegCache::GEN_ARG_X,
RegCache::GEN_ARG_Y,
RegCache::GEN_ARG_Z,
RegCache::GEN_ARG_FOG,
RegCache::VEC_ARG_COLOR,
RegCache::GEN_ARG_ID,
});

#if PPSSPP_PLATFORM(WINDOWS)
// Must save: RBX, RSP, RBP, RDI, RSI, R12-R15, XMM6-15

regCache_.Add(XMM0, RegCache::VEC_INVALID);

regCache_.Add(RCX, RegCache::GEN_ARG_X);
regCache_.Add(RDX, RegCache::GEN_ARG_Y);
regCache_.Add(R8, RegCache::GEN_ARG_Z);
regCache_.Add(R9, RegCache::GEN_ARG_FOG);
regCache_.Add(XMM4, RegCache::VEC_ARG_COLOR);

// Windows reserves space to save args, 1 xmm + 4 ints before the id.
_assert_(!regCache_.Has(RegCache::GEN_ARG_ID));
stackIDOffset_ = 1 * 16 + 4 * PTRBITS / 8;
#else
// Must save: RBX, RSP, RBP, R12-R15

regCache_.Add(R9, RegCache::GEN_INVALID);
regCache_.Add(XMM4, RegCache::VEC_INVALID);

regCache_.Add(RDI, RegCache::GEN_ARG_X);
regCache_.Add(RSI, RegCache::GEN_ARG_Y);
regCache_.Add(RDX, RegCache::GEN_ARG_Z);
regCache_.Add(RCX, RegCache::GEN_ARG_FOG);
regCache_.Add(XMM0, RegCache::VEC_ARG_COLOR);
regCache_.Add(R8, RegCache::GEN_ARG_ID);

_assert_(regCache_.Has(RegCache::GEN_ARG_ID));
stackIDOffset_ = -1;
#endif

// Initially, disallow spill for args (they get unlocked when no longer needed.)
regCache_.ForceRetain(RegCache::GEN_ARG_X);
regCache_.ForceRetain(RegCache::GEN_ARG_Y);
regCache_.ForceRetain(RegCache::GEN_ARG_Z);
regCache_.ForceRetain(RegCache::GEN_ARG_FOG);
regCache_.ForceRetain(RegCache::VEC_ARG_COLOR);
if (regCache_.Has(RegCache::GEN_ARG_ID))
regCache_.ForceRetain(RegCache::GEN_ARG_ID);

BeginWrite();
const u8 *start = AlignCode16();
bool success = true;
Expand Down
133 changes: 133 additions & 0 deletions GPU/Software/RasterizerRegCache.cpp
Expand Up @@ -17,8 +17,141 @@

#include "GPU/Software/RasterizerRegCache.h"

#include "Common/Arm64Emitter.h"

namespace Rasterizer {

void RegCache::SetupABI(const std::vector<Purpose> &args, bool forceRetain) {
#if PPSSPP_ARCH(ARM)
_assert_msg_(false, "Not yet implemented");
#elif PPSSPP_ARCH(ARM64)
using namespace Arm64Gen;

// ARM64 has a generous allotment of registers.
static const Reg genArgs[] = { X0, X1, X2, X3, X4, X5, X6, X7 };
static const Reg vecArgs[] = { Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7 };
size_t genIndex = 0;
size_t vecIndex = 0;

for (const Purpose &p : args) {
if ((p & FLAG_GEN) != 0) {
if (genIndex < ARRAY_SIZE(genArgs)) {
Add(genArgs[genIndex++], p);
if (forceRetain)
ForceRetain(p);
}
} else {
if (vecIndex < ARRAY_SIZE(vecArgs)) {
Add(vecArgs[vecIndex++], p);
if (forceRetain)
ForceRetain(p);
}
}
}

// Any others are free and purposeless.
for (size_t i = genIndex; i < ARRAY_SIZE(genArgs); ++i)
Add(genArgs[i], GEN_INVALID);
for (size_t i = vecIndex; i < ARRAY_SIZE(vecArgs); ++i)
Add(vecArgs[i], VEC_INVALID);

// Add all other caller saved regs without purposes yet.
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15, X16, X17 };
for (Reg r : genTemps)
Add(r, GEN_INVALID);
static const Reg vecTemps[] = { Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23 };
for (Reg r : vecTemps)
Add(r, VEC_INVALID);
// We also have X16-17 and Q24-Q31, but leave those for ordered paired instructions.
#elif PPSSPP_ARCH(X86)
_assert_msg_(false, "Not yet implemented");
#elif PPSSPP_ARCH(AMD64)
using namespace Gen;

#if PPSSPP_PLATFORM(WINDOWS)
// The Windows convention is annoying, as it wastes registers and keeps to "positions."
Reg genArgs[] = { RCX, RDX, R8, R9 };
Reg vecArgs[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5 };

for (size_t i = 0; i < args.size(); ++i) {
const Purpose &p = args[i];
if ((p & FLAG_GEN) != 0) {
if (i < ARRAY_SIZE(genArgs)) {
Add(genArgs[i], p);
genArgs[i] = INVALID_REG;
if (forceRetain)
ForceRetain(p);
}
} else {
if (i < ARRAY_SIZE(vecArgs)) {
Add(vecArgs[i], p);
vecArgs[i] = INVALID_REG;
if (forceRetain)
ForceRetain(p);
}
}
}

// Any unused regs can be used freely as temps.
for (Reg r : genArgs) {
if (r != INVALID_REG)
Add(r, GEN_INVALID);
}
for (Reg r : vecArgs) {
if (r != INVALID_REG)
Add(r, VEC_INVALID);
}

// Additionally, these three are volatile.
// Must save: RBX, RSP, RBP, RDI, RSI, R12-R15, XMM6-15
static const Reg genTemps[] = { RAX, R10, R11 };
for (Reg r : genTemps)
Add(r, GEN_INVALID);
#else
// Okay, first, allocate args. SystemV gives to the first of each usable pool.
static const Reg genArgs[] = { RDI, RSI, RDX, RCX, R8, R9 };
static const Reg vecArgs[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
size_t genIndex = 0;
size_t vecIndex = 0;

for (const Purpose &p : args) {
if ((p & FLAG_GEN) != 0) {
if (genIndex < ARRAY_SIZE(genArgs)) {
Add(genArgs[genIndex++], p);
if (forceRetain)
ForceRetain(p);
}
} else {
if (vecIndex < ARRAY_SIZE(vecArgs)) {
Add(vecArgs[vecIndex++], p);
if (forceRetain)
ForceRetain(p);
}
}
}

// Any others are free and purposeless.
for (size_t i = genIndex; i < ARRAY_SIZE(genArgs); ++i)
Add(genArgs[i], GEN_INVALID);
for (size_t i = vecIndex; i < ARRAY_SIZE(vecArgs); ++i)
Add(vecArgs[i], VEC_INVALID);

// Add all other caller saved regs without purposes yet.
// Must save: RBX, RSP, RBP, R12-R15
static const Reg genTemps[] = { RAX, R10, R11 };
for (Reg r : genTemps)
Add(r, GEN_INVALID);
static const Reg vecTemps[] = { XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 };
for (Reg r : vecTemps)
Add(r, VEC_INVALID);
#endif
#elif PPSSPP_ARCH(MIPS)
_assert_msg_(false, "Not yet implemented");
#else
_assert_msg_(false, "Not yet implemented");
#endif
}

void RegCache::Reset(bool validate) {
if (validate) {
for (auto &reg : regs) {
Expand Down
13 changes: 13 additions & 0 deletions GPU/Software/RasterizerRegCache.h
Expand Up @@ -144,15 +144,28 @@ struct RegCache {
bool forceRetained = false;
};

// Note: Assumes __vectorcall on Windows.
// Keep in mind, some args won't fit in regs, this ignores stack and tracks what's in regs.
void SetupABI(const std::vector<Purpose> &args, bool forceRetain = true);
// Reset after compile complete, pass false for validate if compile failed.
void Reset(bool validate);
// Add register to cache for tracking with initial purpose (won't be locked or force retained.)
void Add(Reg r, Purpose p);
// Find registers with one purpose and change to the other.
void Change(Purpose history, Purpose destiny);
// Release a previously found or allocated register, setting purpose to invalid.
void Release(Reg &r, Purpose p);
// Unlock a previously found or allocated register, but try to retain it.
void Unlock(Reg &r, Purpose p);
// Check if the purpose is currently in a register.
bool Has(Purpose p);
// Return the register for a given purpose (check with Has() first if not certainly there.)
Reg Find(Purpose p);
// Allocate a new register for the given purpose.
Reg Alloc(Purpose p);
// Force a register to be retained, even if we run short on regs.
void ForceRetain(Purpose p);
// Reverse ForceRetain, and release the register back to invalid.
void ForceRelease(Purpose p);

// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
Expand Down

0 comments on commit 99c213f

Please sign in to comment.