Skip to content

Commit

Permalink
riscv: Use automapping for special cases too.
Browse files Browse the repository at this point in the history
  • Loading branch information
unknownbrackets committed Aug 20, 2023
1 parent a190793 commit 6a75e67
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 117 deletions.
8 changes: 8 additions & 0 deletions Core/MIPS/IR/IRRegCache.cpp
Expand Up @@ -643,6 +643,14 @@ void IRNativeRegCacheBase::Map(const IRInst &inst) {
CleanupMapping(mapping, 3);
}

void IRNativeRegCacheBase::MapWithExtra(const IRInst &inst, std::vector<Mapping> extra) {
extra.resize(extra.size() + 3);
MappingFromInst(inst, &extra[extra.size() - 3]);

ApplyMapping(extra.data(), (int)extra.size());
CleanupMapping(extra.data(), (int)extra.size());
}

IRNativeReg IRNativeRegCacheBase::MapWithTemp(const IRInst &inst, MIPSLoc type) {
Mapping mapping[3];
MappingFromInst(inst, mapping);
Expand Down
1 change: 1 addition & 0 deletions Core/MIPS/IR/IRRegCache.h
Expand Up @@ -190,6 +190,7 @@ class IRNativeRegCacheBase {
};

void Map(const IRInst &inst);
void MapWithExtra(const IRInst &inst, std::vector<Mapping> extra);
virtual void FlushAll(bool gprs = true, bool fprs = true);

protected:
Expand Down
24 changes: 12 additions & 12 deletions Core/MIPS/RiscV/RiscVCompALU.cpp
Expand Up @@ -563,25 +563,25 @@ void RiscVJitBackend::CompIR_HiLo(IRInst inst) {

switch (inst.op) {
case IROp::MtLo:
regs_.MapGPRDirtyIn(IRREG_LO, inst.src1);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT } });
MV(regs_.R(IRREG_LO), regs_.R(inst.src1));
regs_.MarkGPRDirty(IRREG_LO, regs_.IsNormalized32(inst.src1));
break;

case IROp::MtHi:
regs_.MapGPRDirtyIn(IRREG_HI, inst.src1);
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::NOINIT } });
MV(regs_.R(IRREG_HI), regs_.R(inst.src1));
regs_.MarkGPRDirty(IRREG_HI, regs_.IsNormalized32(inst.src1));
break;

case IROp::MfLo:
regs_.MapGPRDirtyIn(inst.dest, IRREG_LO);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::INIT } });
MV(regs_.R(inst.dest), regs_.R(IRREG_LO));
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(IRREG_LO));
break;

case IROp::MfHi:
regs_.MapGPRDirtyIn(inst.dest, IRREG_HI);
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::INIT } });
MV(regs_.R(inst.dest), regs_.R(IRREG_HI));
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(IRREG_HI));
break;
Expand Down Expand Up @@ -630,7 +630,7 @@ void RiscVJitBackend::CompIR_Mult(IRInst inst) {
case IROp::Mult:
// TODO: Maybe IR could simplify when HI is not needed or clobbered?
// TODO: HI/LO merge optimization? Have to be careful of passes that split them...
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT } });
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
MUL(regs_.R(IRREG_LO), lhs, rhs);
splitMulResult();
Expand All @@ -639,14 +639,14 @@ void RiscVJitBackend::CompIR_Mult(IRInst inst) {
case IROp::MultU:
// This is an "anti-norm32" case. Let's just zero always.
// TODO: If we could know that LO was only needed, we could use MULW and be done.
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT } });
makeArgsUnsigned(&lhs, &rhs);
MUL(regs_.R(IRREG_LO), lhs, rhs);
splitMulResult();
break;

case IROp::Madd:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, false);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
MUL(SCRATCH1, lhs, rhs);

Expand All @@ -656,7 +656,7 @@ void RiscVJitBackend::CompIR_Mult(IRInst inst) {
break;

case IROp::MaddU:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, false);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
makeArgsUnsigned(&lhs, &rhs);
MUL(SCRATCH1, lhs, rhs);

Expand All @@ -666,7 +666,7 @@ void RiscVJitBackend::CompIR_Mult(IRInst inst) {
break;

case IROp::Msub:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, false);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
MUL(SCRATCH1, lhs, rhs);

Expand All @@ -676,7 +676,7 @@ void RiscVJitBackend::CompIR_Mult(IRInst inst) {
break;

case IROp::MsubU:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, false);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
makeArgsUnsigned(&lhs, &rhs);
MUL(SCRATCH1, lhs, rhs);

Expand All @@ -697,7 +697,7 @@ void RiscVJitBackend::CompIR_Div(IRInst inst) {
RiscVReg numReg, denomReg;
switch (inst.op) {
case IROp::Div:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT } });
// We have to do this because of the divide by zero and overflow checks below.
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
DIVW(regs_.R(IRREG_LO), numReg, denomReg);
Expand Down Expand Up @@ -727,7 +727,7 @@ void RiscVJitBackend::CompIR_Div(IRInst inst) {
break;

case IROp::DivU:
regs_.MapGPRDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT } });
// We have to do this because of the divide by zero check below.
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
DIVUW(regs_.R(IRREG_LO), numReg, denomReg);
Expand Down
24 changes: 8 additions & 16 deletions Core/MIPS/RiscV/RiscVCompFPU.cpp
Expand Up @@ -350,8 +350,7 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
break;

case IRFpCompareMode::EitherUnordered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
OR(SCRATCH1, SCRATCH1, SCRATCH2);
Expand All @@ -362,15 +361,13 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
break;

case IRFpCompareMode::EqualOrdered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
break;

case IRFpCompareMode::EqualUnordered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));

// Now let's just OR in the unordered check.
Expand All @@ -385,30 +382,26 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
break;

case IRFpCompareMode::LessEqualOrdered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
break;

case IRFpCompareMode::LessEqualUnordered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
break;

case IRFpCompareMode::LessOrdered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
break;

case IRFpCompareMode::LessUnordered:
regs_.Map(inst);
regs_.MapGPR(IRREG_FPCOND, MIPSMap::NOINIT);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
regs_.MarkGPRDirty(IRREG_FPCOND, true);
Expand All @@ -417,8 +410,7 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
break;

case IROp::FCmovVfpuCC:
regs_.MapGPR(IRREG_VFPU_CC);
regs_.Map(inst);
regs_.MapWithExtra(inst, { { 'G', IRREG_VFPU_CC, 1, MIPSMap::INIT } });
if ((inst.src2 & 0xF) == 0) {
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), 1);
} else if (cpu_info.RiscV_Zbs) {
Expand Down
8 changes: 4 additions & 4 deletions Core/MIPS/RiscV/RiscVCompSystem.cpp
Expand Up @@ -106,18 +106,18 @@ void RiscVJitBackend::CompIR_Transfer(IRInst inst) {
break;

case IROp::FpCondFromReg:
regs_.MapGPRDirtyIn(IRREG_FPCOND, inst.src1);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
MV(regs_.R(IRREG_FPCOND), regs_.R(inst.src1));
break;

case IROp::FpCondToReg:
regs_.MapGPRDirtyIn(inst.dest, IRREG_FPCOND);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::INIT } });
MV(regs_.R(inst.dest), regs_.R(IRREG_FPCOND));
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(IRREG_FPCOND));
break;

case IROp::FpCtrlFromReg:
regs_.MapGPRDirtyIn(IRREG_FPCOND, inst.src1);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
LI(SCRATCH1, 0x0181FFFF);
AND(SCRATCH1, regs_.R(inst.src1), SCRATCH1);
// Extract the new fpcond value.
Expand All @@ -132,7 +132,7 @@ void RiscVJitBackend::CompIR_Transfer(IRInst inst) {
break;

case IROp::FpCtrlToReg:
regs_.MapGPRDirtyIn(inst.dest, IRREG_FPCOND);
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::INIT } });
// Load fcr31 and clear the fpcond bit.
LW(SCRATCH1, CTXREG, IRREG_FCR31 * 4);
if (cpu_info.RiscV_Zbs) {
Expand Down
49 changes: 12 additions & 37 deletions Core/MIPS/RiscV/RiscVCompVec.cpp
Expand Up @@ -215,9 +215,8 @@ void RiscVJitBackend::CompIR_VecArith(IRInst inst) {
break;

case IROp::Vec4Scale:
regs_.SpillLockFPR(inst.src2);
regs_.MapFPR(inst.src2);
regs_.MapFPR4DirtyIn(inst.dest, inst.src1);
// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
for (int i = 0; i < 4; ++i)
FMUL(32, regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2));
break;
Expand Down Expand Up @@ -245,18 +244,8 @@ void RiscVJitBackend::CompIR_VecHoriz(IRInst inst) {

switch (inst.op) {
case IROp::Vec4Dot:
// TODO: Maybe some option to call the slow accurate mode?
regs_.SpillLockFPR(inst.dest);
for (int i = 0; i < 4; ++i) {
regs_.SpillLockFPR(inst.src1 + i);
regs_.SpillLockFPR(inst.src2 + i);
}
for (int i = 0; i < 4; ++i) {
regs_.MapFPR(inst.src1 + i);
regs_.MapFPR(inst.src2 + i);
}
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);

// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
if ((inst.dest < inst.src1 + 4 && inst.dest >= inst.src1) || (inst.dest < inst.src2 + 4 && inst.dest >= inst.src2)) {
// This means inst.dest overlaps one of src1 or src2. We have to do that one first.
// Technically this may impact -0.0 and such, but dots accurately need to be aligned anyway.
Expand Down Expand Up @@ -292,13 +281,8 @@ void RiscVJitBackend::CompIR_VecPack(IRInst inst) {
break;

case IROp::Vec4Unpack8To32:
regs_.SpillLockFPR(inst.src1);
for (int i = 0; i < 4; ++i)
regs_.SpillLockFPR(inst.dest + i);
regs_.MapFPR(inst.src1);
for (int i = 0; i < 4; ++i)
regs_.MapFPR(inst.dest + i, MIPSMap::NOINIT);

// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src1));
for (int i = 0; i < 4; ++i) {
// Mask using walls.
Expand All @@ -313,13 +297,8 @@ void RiscVJitBackend::CompIR_VecPack(IRInst inst) {
break;

case IROp::Vec2Unpack16To32:
regs_.SpillLockFPR(inst.src1);
for (int i = 0; i < 2; ++i)
regs_.SpillLockFPR(inst.dest + i);
regs_.MapFPR(inst.src1);
for (int i = 0; i < 2; ++i)
regs_.MapFPR(inst.dest + i, MIPSMap::NOINIT);

// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src1));
SLLI(SCRATCH1, SCRATCH2, 16);
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
Expand All @@ -342,13 +321,8 @@ void RiscVJitBackend::CompIR_VecPack(IRInst inst) {
break;

case IROp::Vec4Pack31To8:
regs_.SpillLockFPR(inst.dest);
for (int i = 0; i < 4; ++i) {
regs_.SpillLockFPR(inst.src1 + i);
regs_.MapFPR(inst.src1 + i);
}
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);

// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
for (int i = 0; i < 4; ++i) {
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1 + i));
SRLI(SCRATCH1, SCRATCH1, 23);
Expand All @@ -365,7 +339,8 @@ void RiscVJitBackend::CompIR_VecPack(IRInst inst) {
break;

case IROp::Vec2Pack32To16:
regs_.MapFPRDirtyInIn(inst.dest, inst.src1, inst.src1 + 1);
// TODO: This works for now, but may need to handle aliasing for vectors.
regs_.Map(inst);
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src1 + 1));
// Keep in mind, this was sign-extended, so we have to zero the upper.
Expand Down
40 changes: 0 additions & 40 deletions Core/MIPS/RiscV/RiscVRegCache.cpp
Expand Up @@ -259,25 +259,6 @@ RiscVReg RiscVRegCache::MapGPRAsPointer(IRReg reg) {
return (RiscVReg)MapNativeRegAsPointer(reg);
}

void RiscVRegCache::MapGPRDirtyIn(IRReg rd, IRReg rs, bool avoidLoad) {
SpillLockGPR(rd, rs);
bool load = !avoidLoad || rd == rs;
MapGPR(rd, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
MapGPR(rs);
ReleaseSpillLockGPR(rd, rs);
}

void RiscVRegCache::MapGPRDirtyDirtyInIn(IRReg rd1, IRReg rd2, IRReg rs, IRReg rt, bool avoidLoad) {
SpillLockGPR(rd1, rd2, rs, rt);
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
MapGPR(rd1, load1 ? MIPSMap::DIRTY : MIPSMap::NOINIT);
MapGPR(rd2, load2 ? MIPSMap::DIRTY : MIPSMap::NOINIT);
MapGPR(rt);
MapGPR(rs);
ReleaseSpillLockGPR(rd1, rd2, rs, rt);
}

RiscVReg RiscVRegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {
_dbg_assert_(IsValidFPR(mipsReg));
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG);
Expand All @@ -288,27 +269,6 @@ RiscVReg RiscVRegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {
return INVALID_REG;
}

void RiscVRegCache::MapFPRDirtyInIn(IRReg rd, IRReg rs, IRReg rt, bool avoidLoad) {
SpillLockFPR(rd, rs, rt);
bool load = !avoidLoad || (rd == rs || rd == rt);
MapFPR(rd, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
MapFPR(rt);
MapFPR(rs);
ReleaseSpillLockFPR(rd, rs, rt);
}

void RiscVRegCache::MapFPR4DirtyIn(IRReg rdbase, IRReg rsbase, bool avoidLoad) {
for (int i = 0; i < 4; ++i)
SpillLockFPR(rdbase + i, rsbase + i);
bool load = !avoidLoad || (rdbase < rsbase + 4 && rdbase + 4 > rsbase);
for (int i = 0; i < 4; ++i)
MapFPR(rdbase + i, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
for (int i = 0; i < 4; ++i)
MapFPR(rsbase + i);
for (int i = 0; i < 4; ++i)
ReleaseSpillLockFPR(rdbase + i, rsbase + i);
}

void RiscVRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
RiscVReg r = (RiscVReg)(X0 + nreg);
_assert_(r >= X0 && r <= X31);
Expand Down
8 changes: 0 additions & 8 deletions Core/MIPS/RiscV/RiscVRegCache.h
Expand Up @@ -49,16 +49,8 @@ class RiscVRegCache : public IRNativeRegCacheBase {
// Returns an RV register containing the requested MIPS register.
RiscVGen::RiscVReg MapGPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT);
RiscVGen::RiscVReg MapGPRAsPointer(IRReg reg);

void MapGPRDirtyIn(IRReg rd, IRReg rs, bool avoidLoad = true);
void MapGPRDirtyDirtyInIn(IRReg rd1, IRReg rd2, IRReg rs, IRReg rt, bool avoidLoad = true);

// Returns a RISC-V register containing the requested MIPS register.
RiscVGen::RiscVReg MapFPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT);

void MapFPRDirtyInIn(IRReg rd, IRReg rs, IRReg rt, bool avoidLoad = true);
void MapFPR4DirtyIn(IRReg rdbase, IRReg rsbase, bool avoidLoad = true);

RiscVGen::RiscVReg MapWithFPRTemp(IRInst &inst);

bool IsNormalized32(IRReg reg);
Expand Down

0 comments on commit 6a75e67

Please sign in to comment.