Permalink
Browse files

irjit: Speed up icache block invalidation.

Turns out, in games using a ton of small memcpys, this was causing perf
issues.
  • Loading branch information...
unknownbrackets committed Dec 31, 2017
1 parent d565e23 commit 3af78883c73032fcda90cc52eadd75c226a0fa38
Showing with 56 additions and 15 deletions.
  1. +44 −11 Core/MIPS/IR/IRJit.cpp
  2. +12 −4 Core/MIPS/IR/IRJit.h
View
@@ -71,7 +71,8 @@ void IRJit::Compile(u32 em_address) {
frontend_.DoJit(em_address, instructions, constants, mipsBytes);
b->SetInstructions(instructions, constants);
b->SetOriginalSize(mipsBytes);
b->Finalize(block_num); // Overwrites the first instruction
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num);
if (frontend_.CheckRounding()) {
// Our assumptions are all wrong so it's clean-slate time.
@@ -130,26 +131,56 @@ bool IRJit::ReplaceJalTo(u32 dest) {
}
void IRBlockCache::Clear() {
for (int i = 0; i < size_; ++i) {
for (int i = 0; i < (int)blocks_.size(); ++i) {
blocks_[i].Destroy(i);
}
blocks_.clear();
byPage_.clear();
}
void IRBlockCache::InvalidateICache(u32 address, u32 length) {
// TODO: Could be more efficient.
for (int i = 0; i < size_; ++i) {
if (blocks_[i].OverlapsRange(address, length)) {
blocks_[i].Destroy(i);
u32 startPage = AddressToPage(address);
u32 endPage = AddressToPage(address + length);
for (u32 page = startPage; page <= endPage; ++page) {
const auto iter = byPage_.find(page);
if (iter == byPage_.end())
continue;
const std::vector<int> &blocksInPage = iter->second;
for (int i : blocksInPage) {
if (blocks_[i].OverlapsRange(address, length)) {
// Not removing from the page, hopefully doesn't build up with small recompiles.
blocks_[i].Destroy(i);
}
}
}
}
void IRBlockCache::FinalizeBlock(int i) {
blocks_[i].Finalize(i);
u32 startAddr, size;
blocks_[i].GetRange(startAddr, size);
u32 startPage = AddressToPage(startAddr);
u32 endPage = AddressToPage(startAddr + size);
for (u32 page = startPage; page <= endPage; ++page) {
byPage_[page].push_back(i);
}
}
u32 IRBlockCache::AddressToPage(u32 addr) {
// Use relatively small pages since basic blocks are typically small.
return (addr & 0x3FFFFFFF) >> 10;
}
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
std::vector<u32> result;
result.resize(size_);
result.resize(blocks_.size());
for (int number = 0; number < size_; ++number) {
for (int number = 0; number < (int)blocks_.size(); ++number) {
IRBlock &b = blocks_[number];
if (b.IsValid() && b.RestoreOriginalFirstOp(number)) {
result[number] = number;
@@ -162,12 +193,12 @@ std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
}
void IRBlockCache::RestoreSavedEmuHackOps(std::vector<u32> saved) {
if (size_ != (int)saved.size()) {
if ((int)blocks_.size() != (int)saved.size()) {
ERROR_LOG(JIT, "RestoreSavedEmuHackOps: Wrong saved block size.");
return;
}
for (int number = 0; number < size_; ++number) {
for (int number = 0; number < (int)blocks_.size(); ++number) {
IRBlock &b = blocks_[number];
// Only if we restored it, write it back.
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
@@ -207,7 +238,9 @@ void IRBlock::Destroy(int number) {
}
bool IRBlock::OverlapsRange(u32 addr, u32 size) {
return addr + size > origAddr_ && addr < origAddr_ + origSize_;
addr &= 0x3FFFFFFF;
u32 origAddr = origAddr_ & 0x3FFFFFFF;
return addr + size > origAddr && addr < origAddr + origSize_;
}
MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) {
View
@@ -18,6 +18,7 @@
#pragma once
#include <cstring>
#include <unordered_map>
#include "Common/Common.h"
#include "Common/CPUDetect.h"
@@ -81,6 +82,11 @@ class IRBlock {
}
bool OverlapsRange(u32 addr, u32 size);
void GetRange(u32 &start, u32 &size) {
start = origAddr_;
size = origSize_;
}
void Finalize(int number);
void Destroy(int number);
@@ -96,17 +102,17 @@ class IRBlock {
class IRBlockCache {
public:
IRBlockCache() : size_(0) {}
IRBlockCache() {}
void Clear();
void InvalidateICache(u32 address, u32 length);
void FinalizeBlock(int i);
int GetNumBlocks() const { return (int)blocks_.size(); }
int AllocateBlock(int emAddr) {
blocks_.push_back(IRBlock(emAddr));
size_ = (int)blocks_.size();
return (int)blocks_.size() - 1;
}
IRBlock *GetBlock(int i) {
if (i >= 0 && i < size_) {
if (i >= 0 && i < (int)blocks_.size()) {
return &blocks_[i];
} else {
return nullptr;
@@ -117,8 +123,10 @@ class IRBlockCache {
void RestoreSavedEmuHackOps(std::vector<u32> saved);
private:
int size_; // Hm, is this a cache for speed in debug mode, or what?
u32 AddressToPage(u32 addr);
std::vector<IRBlock> blocks_;
std::unordered_map<u32, std::vector<int>> byPage_;
};
class IRJit : public JitInterface {

0 comments on commit 3af7888

Please sign in to comment.