@@ -189,6 +189,8 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
return static_cast<T>(var);
}

bool wi = false;

if (!never_translate && MSR.DR)
{
auto translated_addr = TranslateAddress<flag>(em_address);
@@ -199,6 +201,7 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
return 0;
}
em_address = translated_addr.address;
wi = translated_addr.wi;
}

if (flag == XCheckTLBFlag::Read && (em_address & 0xF8000000) == 0x08000000)
@@ -223,15 +226,37 @@ static T ReadFromHardware(Memory::MemoryManager& memory, u32 em_address)
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
T value;
std::memcpy(&value, &memory.GetRAM()[em_address & memory.GetRamMask()], sizeof(T));
em_address &= memory.GetRamMask();

if (!ppcState.m_enable_dcache || wi)
{
std::memcpy(&value, &memory.GetRAM()[em_address], sizeof(T));
}
else
{
ppcState.dCache.Read(em_address, &value, sizeof(T),
HID0.DLOCK || flag != XCheckTLBFlag::Read);
}

return bswap(value);
}

if (memory.GetEXRAM() && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal())
{
T value;
std::memcpy(&value, &memory.GetEXRAM()[em_address & 0x0FFFFFFF], sizeof(T));
em_address &= 0x0FFFFFFF;

if (!ppcState.m_enable_dcache || wi)
{
std::memcpy(&value, &memory.GetEXRAM()[em_address], sizeof(T));
}
else
{
ppcState.dCache.Read(em_address + 0x10000000, &value, sizeof(T),
HID0.DLOCK || flag != XCheckTLBFlag::Read);
}

return bswap(value);
}

@@ -396,14 +421,28 @@ static void WriteToHardware(Core::System& system, Memory::MemoryManager& memory,
{
// Handle RAM; the masking intentionally discards bits (essentially creating
// mirrors of memory).
std::memcpy(&memory.GetRAM()[em_address & memory.GetRamMask()], &swapped_data, size);
em_address &= memory.GetRamMask();

if (ppcState.m_enable_dcache && !wi)
ppcState.dCache.Write(em_address, &swapped_data, size, HID0.DLOCK);

if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write)
std::memcpy(&memory.GetRAM()[em_address], &swapped_data, size);

return;
}

if (memory.GetEXRAM() && (em_address >> 28) == 0x1 &&
(em_address & 0x0FFFFFFF) < memory.GetExRamSizeReal())
{
std::memcpy(&memory.GetEXRAM()[em_address & 0x0FFFFFFF], &swapped_data, size);
em_address &= 0x0FFFFFFF;

if (ppcState.m_enable_dcache && !wi)
ppcState.dCache.Write(em_address + 0x10000000, &swapped_data, size, HID0.DLOCK);

if (!ppcState.m_enable_dcache || wi || flag != XCheckTLBFlag::Write)
std::memcpy(&memory.GetEXRAM()[em_address], &swapped_data, size);

return;
}

@@ -1105,7 +1144,7 @@ void DMA_MemoryToLC(const u32 cache_address, const u32 mem_address, const u32 nu
memcpy(dst, src, 32 * num_blocks);
}

void ClearCacheLine(u32 address)
void ClearDCacheLine(u32 address)
{
DEBUG_ASSERT((address & 0x1F) == 0);
if (MSR.DR)
@@ -1136,6 +1175,100 @@ void ClearCacheLine(u32 address)
WriteToHardware<XCheckTLBFlag::Write, true>(system, memory, address + i, 0, 4);
}

void StoreDCacheLine(u32 address)
{
address &= ~0x1F;

if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}

if (ppcState.m_enable_dcache)
ppcState.dCache.Store(address);
}

void InvalidateDCacheLine(u32 address)
{
address &= ~0x1F;

if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
return;
}
address = translated_address.address;
}

if (ppcState.m_enable_dcache)
ppcState.dCache.Invalidate(address);
}

void FlushDCacheLine(u32 address)
{
address &= ~0x1F;

if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}

if (ppcState.m_enable_dcache)
ppcState.dCache.Flush(address);
}

void TouchDCacheLine(u32 address, bool store)
{
address &= ~0x1F;

if (MSR.DR)
{
auto translated_address = TranslateAddress<XCheckTLBFlag::Write>(address);
if (translated_address.result == TranslateAddressResultEnum::DIRECT_STORE_SEGMENT)
{
return;
}
if (translated_address.result == TranslateAddressResultEnum::PAGE_FAULT)
{
// If translation fails, generate a DSI.
GenerateDSIException(address, true);
return;
}
address = translated_address.address;
}

if (ppcState.m_enable_dcache)
ppcState.dCache.Touch(address, store);
}

u32 IsOptimizableMMIOAccess(u32 address, u32 access_size)
{
if (PowerPC::memchecks.HasAny())
@@ -164,7 +164,12 @@ void Write_F64(double var, u32 address);

void DMA_LCToMemory(u32 mem_address, u32 cache_address, u32 num_blocks);
void DMA_MemoryToLC(u32 cache_address, u32 mem_address, u32 num_blocks);
void ClearCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned

void ClearDCacheLine(u32 address); // Zeroes 32 bytes; address should be 32-byte-aligned
void StoreDCacheLine(u32 address);
void InvalidateDCacheLine(u32 address);
void FlushDCacheLine(u32 address);
void TouchDCacheLine(u32 address, bool store);

// TLB functions
void SDRUpdated();

Large diffs are not rendered by default.

@@ -12,37 +12,64 @@ class PointerWrap;

namespace PowerPC
{
constexpr u32 ICACHE_SETS = 128;
constexpr u32 ICACHE_WAYS = 8;
constexpr u32 CACHE_SETS = 128;
constexpr u32 CACHE_WAYS = 8;
// size of an instruction cache block in words
constexpr u32 ICACHE_BLOCK_SIZE = 8;
constexpr u32 CACHE_BLOCK_SIZE = 8;

constexpr u32 ICACHE_EXRAM_BIT = 0x10000000;
constexpr u32 ICACHE_VMEM_BIT = 0x20000000;
constexpr u32 CACHE_EXRAM_BIT = 0x10000000;
constexpr u32 CACHE_VMEM_BIT = 0x20000000;

struct InstructionCache
struct Cache
{
std::array<std::array<std::array<u32, ICACHE_BLOCK_SIZE>, ICACHE_WAYS>, ICACHE_SETS> data{};
std::array<std::array<u32, ICACHE_WAYS>, ICACHE_SETS> tags{};
std::array<u32, ICACHE_SETS> plru{};
std::array<u32, ICACHE_SETS> valid{};
std::array<std::array<std::array<u32, CACHE_BLOCK_SIZE>, CACHE_WAYS>, CACHE_SETS> data{};

// Stores the 32-byte aligned address of the start of each cache block. This consists of the cache
// set and tag. Real hardware only needs to store the tag, but also including the set simplifies
// debugging and getting the actual address in the cache, without changing behavior (as the set
// portion of the address is by definition the same for all addresses in a set).
std::array<std::array<u32, CACHE_WAYS>, CACHE_SETS> addrs{};

std::array<u8, CACHE_SETS> plru{};
std::array<u8, CACHE_SETS> valid{};
std::array<u8, CACHE_SETS> modified{};

// Note: This is only for performance purposes; this same data could be computed at runtime
// from the tags and valid fields (and that's how it's done on the actual cache)
std::array<u8, 1 << 20> lookup_table{};
std::array<u8, 1 << 21> lookup_table_ex{};
std::array<u8, 1 << 20> lookup_table_vmem{};

bool m_disable_icache = false;
void Store(u32 addr);
void Invalidate(u32 addr);
void Flush(u32 addr);
void Touch(u32 addr, bool store);

void FlushAll();

std::pair<u32, u32> GetCache(u32 addr, bool locked);

void Read(u32 addr, void* buffer, u32 len, bool locked);
void Write(u32 addr, const void* buffer, u32 len, bool locked);

void Init();
void Reset();

void DoState(PointerWrap& p);
};

struct InstructionCache : public Cache
{
std::optional<size_t> m_config_callback_id = std::nullopt;

bool m_disable_icache = false;

InstructionCache() = default;
~InstructionCache();
u32 ReadInstruction(u32 addr);
void Invalidate(u32 addr);
void Init();
void Reset();
void DoState(PointerWrap& p);
void RefreshConfig();
};
} // namespace PowerPC
@@ -132,9 +132,20 @@ void DoState(PointerWrap& p)
p.Do(ppcState.reserve_address);

ppcState.iCache.DoState(p);
ppcState.dCache.DoState(p);

if (p.IsReadMode())
{
if (!ppcState.m_enable_dcache)
{
INFO_LOG_FMT(POWERPC, "Flushing data cache");
ppcState.dCache.FlushAll();
}
else
{
ppcState.dCache.Reset();
}

RoundingModeUpdated();
IBATUpdated();
DBATUpdated();
@@ -266,6 +277,9 @@ void Init(CPUCore cpu_core)

InitializeCPUCore(cpu_core);
ppcState.iCache.Init();
ppcState.dCache.Init();

ppcState.m_enable_dcache = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE);

if (Config::Get(Config::MAIN_ENABLE_DEBUGGING))
breakpoints.ClearAllTemporary();
@@ -279,6 +293,7 @@ void Reset()

ResetRegisters();
ppcState.iCache.Reset();
ppcState.dCache.Reset();
}

void ScheduleInvalidateCacheThreadSafe(u32 address)
@@ -172,6 +172,8 @@ struct PowerPCState
u32 pagetable_hashmask = 0;

InstructionCache iCache;
bool m_enable_dcache = false;
Cache dCache;

// Reservation monitor for lwarx and its friend stwcxd.
bool reserve;
@@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue;
static std::condition_variable s_state_write_queue_is_empty;

// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 156; // Last changed in PR 11184
constexpr u32 STATE_VERSION = 157; // Last changed in PR 11183

// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,
@@ -223,14 +223,18 @@ static void DoState(PointerWrap& p)
g_video_backend->DoState(p);
p.DoMarker("video_backend");

PowerPC::DoState(p);
p.DoMarker("PowerPC");
// CoreTiming needs to be restored before restoring Hardware because
// the controller code might need to schedule an event if the controller has changed.
system.GetCoreTiming().DoState(p);
p.DoMarker("CoreTiming");

// HW needs to be restored before PowerPC because the data cache might need to be flushed.
HW::DoState(p);
p.DoMarker("HW");

PowerPC::DoState(p);
p.DoMarker("PowerPC");

if (SConfig::GetInstance().bWii)
Wiimote::DoState(p);
p.DoMarker("Wiimote");
@@ -74,6 +74,12 @@ void AdvancedPane::CreateLayout()
"affect performance.\nThe performance impact is the same as having Enable MMU on."));
cpu_options_group_layout->addWidget(m_pause_on_panic_checkbox);

m_accurate_cpu_cache_checkbox = new QCheckBox(tr("Enable Write-Back Cache (slow)"));
m_accurate_cpu_cache_checkbox->setToolTip(
tr("Enables emulation of the CPU write-back cache.\nEnabling will have a significant impact "
"on performance.\nThis should be left disabled unless absolutely needed."));
cpu_options_group_layout->addWidget(m_accurate_cpu_cache_checkbox);

auto* clock_override = new QGroupBox(tr("Clock Override"));
auto* clock_override_layout = new QVBoxLayout();
clock_override->setLayout(clock_override_layout);
@@ -189,6 +195,9 @@ void AdvancedPane::ConnectLayout()
connect(m_pause_on_panic_checkbox, &QCheckBox::toggled, this,
[](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_PAUSE_ON_PANIC, checked); });

connect(m_accurate_cpu_cache_checkbox, &QCheckBox::toggled, this,
[](bool checked) { Config::SetBaseOrCurrent(Config::MAIN_ACCURATE_CPU_CACHE, checked); });

m_cpu_clock_override_checkbox->setChecked(Config::Get(Config::MAIN_OVERCLOCK_ENABLE));
connect(m_cpu_clock_override_checkbox, &QCheckBox::toggled, [this](bool enable_clock_override) {
Config::SetBaseOrCurrent(Config::MAIN_OVERCLOCK_ENABLE, enable_clock_override);
@@ -258,6 +267,9 @@ void AdvancedPane::Update()
m_pause_on_panic_checkbox->setChecked(Config::Get(Config::MAIN_PAUSE_ON_PANIC));
m_pause_on_panic_checkbox->setEnabled(!running);

m_accurate_cpu_cache_checkbox->setChecked(Config::Get(Config::MAIN_ACCURATE_CPU_CACHE));
m_accurate_cpu_cache_checkbox->setEnabled(!running);

QFont bf = font();
bf.setBold(Config::GetActiveLayerForConfig(Config::MAIN_OVERCLOCK_ENABLE) !=
Config::LayerType::Base);
@@ -33,6 +33,7 @@ class AdvancedPane final : public QWidget
QComboBox* m_cpu_emulation_engine_combobox;
QCheckBox* m_enable_mmu_checkbox;
QCheckBox* m_pause_on_panic_checkbox;
QCheckBox* m_accurate_cpu_cache_checkbox;
QCheckBox* m_cpu_clock_override_checkbox;
QSlider* m_cpu_clock_override_slider;
QLabel* m_cpu_clock_override_slider_label;