Skip to content

Commit

Permalink
Merge pull request #17937 from unknownbrackets/irjit-compile
Browse files Browse the repository at this point in the history
Reduce time spent in IR compile
  • Loading branch information
hrydgard committed Aug 20, 2023
2 parents cd1c5be + 32d8f61 commit b90d628
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 47 deletions.
25 changes: 16 additions & 9 deletions Core/MIPS/IR/IRAnalysis.cpp
Expand Up @@ -21,28 +21,35 @@
#include <algorithm>


static bool IRReadsFrom(const IRInst &inst, int reg, char type, bool directly = false) {
static bool IRReadsFrom(const IRInst &inst, int reg, char type, bool *directly) {
const IRMeta *m = GetIRMeta(inst.op);

if (m->types[1] == type && inst.src1 == reg) {
if (directly)
*directly = true;
return true;
}
if (m->types[2] == type && inst.src2 == reg) {
if (directly)
*directly = true;
return true;
}
if ((m->flags & (IRFLAG_SRC3 | IRFLAG_SRC3DST)) != 0 && m->types[0] == type && inst.src3 == reg) {
if (directly)
*directly = true;
return true;
}
if (!directly) {
if (inst.op == IROp::Interpret || inst.op == IROp::CallReplacement || inst.op == IROp::Syscall || inst.op == IROp::Break)
return true;
if (inst.op == IROp::Breakpoint || inst.op == IROp::MemoryCheck)
return true;
}

if (directly)
*directly = false;
if (inst.op == IROp::Interpret || inst.op == IROp::CallReplacement || inst.op == IROp::Syscall || inst.op == IROp::Break)
return true;
if (inst.op == IROp::Breakpoint || inst.op == IROp::MemoryCheck)
return true;
return false;
}

bool IRReadsFromFPR(const IRInst &inst, int reg, bool directly) {
bool IRReadsFromFPR(const IRInst &inst, int reg, bool *directly) {
if (IRReadsFrom(inst, reg, 'F', directly))
return true;

Expand Down Expand Up @@ -85,7 +92,7 @@ static int IRReadsFromList(const IRInst &inst, IRReg regs[4], char type) {
return c;
}

bool IRReadsFromGPR(const IRInst &inst, int reg, bool directly) {
bool IRReadsFromGPR(const IRInst &inst, int reg, bool *directly) {
return IRReadsFrom(inst, reg, 'G', directly);
}

Expand Down
4 changes: 2 additions & 2 deletions Core/MIPS/IR/IRAnalysis.h
Expand Up @@ -19,8 +19,8 @@

#include "Core/MIPS/IR/IRInst.h"

bool IRReadsFromFPR(const IRInst &inst, int reg, bool directly = false);
bool IRReadsFromGPR(const IRInst &inst, int reg, bool directly = false);
bool IRReadsFromFPR(const IRInst &inst, int reg, bool *directly = nullptr);
bool IRReadsFromGPR(const IRInst &inst, int reg, bool *directly = nullptr);
bool IRWritesToGPR(const IRInst &inst, int reg);
bool IRWritesToFPR(const IRInst &inst, int reg);
int IRDestGPR(const IRInst &inst);
Expand Down
3 changes: 3 additions & 0 deletions Core/MIPS/IR/IRInst.h
Expand Up @@ -375,6 +375,9 @@ class IRWriter {
int AddConstant(u32 value);
int AddConstantFloat(float value);

void Reserve(size_t s) {
insts_.reserve(s);
}
void Clear() {
insts_.clear();
}
Expand Down
1 change: 1 addition & 0 deletions Core/MIPS/IR/IRJit.cpp
Expand Up @@ -168,6 +168,7 @@ void IRJit::CompileFunction(u32 start_address, u32 length) {
// We may go up and down from branches, so track all block starts done here.
std::set<u32> doneAddresses;
std::vector<u32> pendingAddresses;
pendingAddresses.reserve(16);
pendingAddresses.push_back(start_address);
while (!pendingAddresses.empty()) {
u32 em_address = pendingAddresses.back();
Expand Down
56 changes: 41 additions & 15 deletions Core/MIPS/IR/IRPassSimplify.cpp
Expand Up @@ -86,6 +86,8 @@ IROp ShiftToShiftImm(IROp op) {
}

bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWriter &out, const IROptions &opts) {
out.Reserve(in.GetInstructions().size());

if (c == 1) {
return passes[0](in, out, opts);
}
Expand All @@ -95,15 +97,20 @@ bool IRApplyPasses(const IRPassFunc *passes, size_t c, const IRWriter &in, IRWri
IRWriter temp[2];
const IRWriter *nextIn = &in;
IRWriter *nextOut = &temp[1];
temp[1].Reserve(nextIn->GetInstructions().size());
for (size_t i = 0; i < c - 1; ++i) {
if (passes[i](*nextIn, *nextOut, opts)) {
logBlocks = true;
}

temp[0] = std::move(temp[1]);
nextIn = &temp[0];

temp[1].Clear();
temp[1].Reserve(nextIn->GetInstructions().size());
}

out.Reserve(nextIn->GetInstructions().size());
if (passes[c - 1](*nextIn, out, opts)) {
logBlocks = true;
}
Expand Down Expand Up @@ -947,46 +954,65 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
int8_t fplen = 0;
};
std::vector<Check> checks;
checks.reserve(insts.size() / 2);

// This tracks the last index at which each reg was modified.
int lastWrittenTo[256];
int lastReadFrom[256];
memset(lastWrittenTo, -1, sizeof(lastWrittenTo));
memset(lastReadFrom, -1, sizeof(lastReadFrom));

auto readsFromFPRCheck = [](IRInst &inst, Check &check, bool directly) {
auto readsFromFPRCheck = [](IRInst &inst, Check &check, bool *directly) {
if (check.reg < 32)
return false;
if (check.fplen >= 1 && IRReadsFromFPR(inst, check.reg - 32, directly))
return true;
if (check.fplen >= 2 && IRReadsFromFPR(inst, check.reg - 32 + 1, directly))
return true;
if (check.fplen >= 3 && IRReadsFromFPR(inst, check.reg - 32 + 2, directly))
return true;
if (check.fplen >= 4 && IRReadsFromFPR(inst, check.reg - 32 + 3, directly))
return true;
return false;

bool result = false;
*directly = true;
for (int i = 0; i < 4; ++i) {
bool laneDirectly;
if (check.fplen >= i + 1 && IRReadsFromFPR(inst, check.reg - 32 + i, &laneDirectly)) {
result = true;
if (!laneDirectly) {
*directly = false;
break;
}
}
}
return result;
};

bool logBlocks = false;
size_t firstCheck = 0;
for (int i = 0, n = (int)in.GetInstructions().size(); i < n; i++) {
IRInst inst = in.GetInstructions()[i];
const IRMeta *m = GetIRMeta(inst.op);

// It helps to skip through rechecking ones we already discarded.
for (size_t ch = firstCheck; ch < checks.size(); ++ch) {
Check &check = checks[ch];
if (check.reg != 0) {
firstCheck = ch;
break;
}
}

// Check if we can optimize by running through all the writes we've previously found.
for (Check &check : checks) {
for (size_t ch = firstCheck; ch < checks.size(); ++ch) {
Check &check = checks[ch];
if (check.reg == 0) {
// This means we already optimized this or a later inst depends on it.
continue;
}

if (IRReadsFromGPR(inst, check.reg)) {
bool readsDirectly;
if (IRReadsFromGPR(inst, check.reg, &readsDirectly)) {
// If this reads from the reg, we either depend on it or we can fold or swap.
// That's determined below.

// If this reads and writes the reg (e.g. MovZ, Load32Left), we can't just swap.
bool mutatesReg = IRMutatesDestGPR(inst, check.reg);
// If this doesn't directly read (i.e. Interpret), we can't swap.
bool cannotReplace = !IRReadsFromGPR(inst, check.reg, true);
bool cannotReplace = !readsDirectly;
if (!mutatesReg && !cannotReplace && check.srcReg >= 0 && lastWrittenTo[check.srcReg] < check.index) {
// Replace with the srcReg instead. This happens with non-nice delay slots.
// We're changing "Mov A, B; Add C, C, A" to "Mov A, B; Add C, C, B" here.
Expand Down Expand Up @@ -1018,7 +1044,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
// Legitimately read from, so we can't optimize out.
check.reg = 0;
}
} else if (readsFromFPRCheck(inst, check, false) && check.fplen >= 1) {
} else if (check.fplen >= 1 && readsFromFPRCheck(inst, check, &readsDirectly)) {
// If one or the other is a Vec, they must match.
bool lenMismatch = false;

Expand All @@ -1043,7 +1069,7 @@ bool PurgeTemps(const IRWriter &in, IRWriter &out, const IROptions &opts) {
if ((m->flags & (IRFLAG_SRC3 | IRFLAG_SRC3DST)) != 0)
checkMismatch(inst.src3, m->types[3]);

bool cannotReplace = !readsFromFPRCheck(inst, check, true) || lenMismatch;
bool cannotReplace = !readsDirectly || lenMismatch;
if (!cannotReplace && check.srcReg >= 32 && lastWrittenTo[check.srcReg] < check.index) {
// This is probably not worth doing unless we can get rid of a temp.
if (!check.readByExit) {
Expand Down
31 changes: 20 additions & 11 deletions Core/MIPS/IR/IRRegCache.cpp
Expand Up @@ -30,32 +30,41 @@
#include "Core/MIPS/JitCommon/JitState.h"

void IRImmRegCache::Flush(IRReg rd) {
if (rd == 0) {
return;
}
if (reg_[rd].isImm) {
if (isImm_[rd]) {
if (rd == 0) {
return;
}
_assert_((rd > 0 && rd < 32) || (rd >= IRTEMP_0 && rd < IRREG_VFPU_CTRL_BASE));
ir_->WriteSetConstant(rd, reg_[rd].immVal);
reg_[rd].isImm = false;
ir_->WriteSetConstant(rd, immVal_[rd]);
isImm_[rd] = false;
}
}

void IRImmRegCache::Discard(IRReg rd) {
if (rd == 0) {
return;
}
reg_[rd].isImm = false;
isImm_[rd] = false;
}

IRImmRegCache::IRImmRegCache(IRWriter *ir) : ir_(ir) {
memset(&reg_, 0, sizeof(reg_));
reg_[0].isImm = true;
memset(&isImm_, 0, sizeof(isImm_));
memset(&immVal_, 0, sizeof(immVal_));
isImm_[0] = true;
ir_ = ir;
}

void IRImmRegCache::FlushAll() {
for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; i++) {
Flush(i);
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS; ) {
if (isImm_[i]) {
Flush(i);
}

// Most of the time, lots are not. This speeds it up a lot.
bool *next = (bool *)memchr(&isImm_[i], 1, TOTAL_MAPPABLE_IRREGS - i);
if (!next)
break;
i = (int)(next - &isImm_[0]);
}
}

Expand Down
16 changes: 6 additions & 10 deletions Core/MIPS/IR/IRRegCache.h
Expand Up @@ -48,12 +48,12 @@ class IRImmRegCache {
IRImmRegCache(IRWriter *ir);

void SetImm(IRReg r, u32 immVal) {
reg_[r].isImm = true;
reg_[r].immVal = immVal;
isImm_[r] = true;
immVal_[r] = immVal;
}

bool IsImm(IRReg r) const { return reg_[r].isImm; }
u32 GetImm(IRReg r) const { return reg_[r].immVal; }
bool IsImm(IRReg r) const { return isImm_[r]; }
u32 GetImm(IRReg r) const { return immVal_[r]; }

void FlushAll();

Expand All @@ -68,12 +68,8 @@ class IRImmRegCache {
void Flush(IRReg rd);
void Discard(IRReg rd);

struct RegIR {
bool isImm;
u32 immVal;
};

RegIR reg_[TOTAL_MAPPABLE_IRREGS];
bool isImm_[TOTAL_MAPPABLE_IRREGS];
uint32_t immVal_[TOTAL_MAPPABLE_IRREGS];
IRWriter *ir_;
};

Expand Down

0 comments on commit b90d628

Please sign in to comment.