Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #11183 from TheLordScruffy/write-back-cache
Implement PowerPC data cache
  • Loading branch information
AdmiralCurtiss committed Jan 9, 2023
2 parents e06c115 + d85f6c8 commit eeeab3c
Show file tree
Hide file tree
Showing 23 changed files with 570 additions and 142 deletions.
1 change: 1 addition & 0 deletions Source/Core/Core/Config/MainSettings.cpp
Expand Up @@ -37,6 +37,7 @@ const Info<PowerPC::CPUCore> MAIN_CPU_CORE{{System::Main, "Core", "CPUCore"},
PowerPC::DefaultCPUCore()};
const Info<bool> MAIN_JIT_FOLLOW_BRANCH{{System::Main, "Core", "JITFollowBranch"}, true};
const Info<bool> MAIN_FASTMEM{{System::Main, "Core", "Fastmem"}, true};
const Info<bool> MAIN_ACCURATE_CPU_CACHE{{System::Main, "Core", "AccurateCPUCache"}, false};
const Info<bool> MAIN_DSP_HLE{{System::Main, "Core", "DSPHLE"}, true};
const Info<int> MAIN_TIMING_VARIANCE{{System::Main, "Core", "TimingVariance"}, 40};
const Info<bool> MAIN_CPU_THREAD{{System::Main, "Core", "CPUThread"}, true};
Expand Down
1 change: 1 addition & 0 deletions Source/Core/Core/Config/MainSettings.h
Expand Up @@ -55,6 +55,7 @@ extern const Info<bool> MAIN_SKIP_IPL;
extern const Info<PowerPC::CPUCore> MAIN_CPU_CORE;
extern const Info<bool> MAIN_JIT_FOLLOW_BRANCH;
extern const Info<bool> MAIN_FASTMEM;
extern const Info<bool> MAIN_ACCURATE_CPU_CACHE;
// Should really be in the DSP section, but we're kind of stuck with bad decisions made in the past.
extern const Info<bool> MAIN_DSP_HLE;
extern const Info<int> MAIN_TIMING_VARIANCE;
Expand Down
1 change: 1 addition & 0 deletions Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp
Expand Up @@ -127,6 +127,7 @@ bool IsSettingSaveable(const Config::Location& config_location)
&Config::MAIN_CPU_THREAD.GetLocation(),
&Config::MAIN_MMU.GetLocation(),
&Config::MAIN_PAUSE_ON_PANIC.GetLocation(),
&Config::MAIN_ACCURATE_CPU_CACHE.GetLocation(),
&Config::MAIN_BB_DUMP_PORT.GetLocation(),
&Config::MAIN_SYNC_GPU.GetLocation(),
&Config::MAIN_SYNC_GPU_MAX_DISTANCE.GetLocation(),
Expand Down
3 changes: 1 addition & 2 deletions Source/Core/Core/DolphinAnalytics.cpp
Expand Up @@ -135,8 +135,7 @@ void DolphinAnalytics::ReportGameStart()
}

// Keep in sync with enum class GameQuirk definition.
constexpr std::array<const char*, 28> GAME_QUIRKS_NAMES{
"icache-matters",
constexpr std::array<const char*, 27> GAME_QUIRKS_NAMES{
"directly-reads-wiimote-input",
"uses-DVDLowStopLaser",
"uses-DVDLowOffset",
Expand Down
5 changes: 1 addition & 4 deletions Source/Core/Core/DolphinAnalytics.h
Expand Up @@ -21,12 +21,9 @@

enum class GameQuirk
{
// Sometimes code run from ICache is different from its mirror in RAM.
ICACHE_MATTERS = 0,

// The Wii remote hardware makes it possible to bypass normal data reporting and directly
// "read" extension or IR data. This would break our current TAS/NetPlay implementation.
DIRECTLY_READS_WIIMOTE_INPUT,
DIRECTLY_READS_WIIMOTE_INPUT = 0,

// Several Wii DI commands that are rarely/never used and not implemented by Dolphin
USES_DVD_LOW_STOP_LASER,
Expand Down
81 changes: 44 additions & 37 deletions Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp
Expand Up @@ -438,14 +438,17 @@ void Interpreter::dcba(UGeckoInstruction inst)

void Interpreter::dcbf(UGeckoInstruction inst)
{
// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).

// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}

PowerPC::FlushDCacheLine(address);
}

void Interpreter::dcbi(UGeckoInstruction inst)
Expand All @@ -456,42 +459,44 @@ void Interpreter::dcbi(UGeckoInstruction inst)
return;
}

// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).

// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}

PowerPC::InvalidateDCacheLine(address);
}

void Interpreter::dcbst(UGeckoInstruction inst)
{
// TODO: Implement some sort of L2 emulation.
// TODO: Raise DSI if translation fails (except for direct-store segments).

// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
const u32 address = Helper_Get_EA_X(PowerPC::ppcState, inst);
JitInterface::InvalidateICacheLine(address);
if (!PowerPC::ppcState.m_enable_dcache)
{
// Invalidate the JIT cache here as a heuristic to compensate for
// the lack of precise L1 icache emulation in the JIT. (Portable software
// should use icbi consistently, but games aren't portable.)
JitInterface::InvalidateICacheLine(address);
return;
}

PowerPC::StoreDCacheLine(address);
}

// These instructions hint that it might be optimal to prefetch the specified cache line into the
// data cache. But the CPU is never guaranteed to do this fetch, and in practice it's not more
// performant to emulate it.

void Interpreter::dcbt(UGeckoInstruction inst)
{
if (HID0.NOOPTI)
return;

// TODO: Implement some sort of L2 emulation.
}

void Interpreter::dcbtst(UGeckoInstruction inst)
{
if (HID0.NOOPTI)
return;

// TODO: Implement some sort of L2 emulation.
}

void Interpreter::dcbz(UGeckoInstruction inst)
Expand All @@ -504,15 +509,18 @@ void Interpreter::dcbz(UGeckoInstruction inst)
return;
}

// Hack to stop dcbz/dcbi over low MEM1 trashing memory.
if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) &&
Config::Get(Config::MAIN_LOW_DCBZ_HACK))
if (!PowerPC::ppcState.m_enable_dcache)
{
return;
// Hack to stop dcbz/dcbi over low MEM1 trashing memory. This is not needed if data cache
// emulation is enabled.
if ((dcbz_addr < 0x80008000) && (dcbz_addr >= 0x80000000) &&
Config::Get(Config::MAIN_LOW_DCBZ_HACK))
{
return;
}
}

// TODO: Implement some sort of L2 emulation.
PowerPC::ClearCacheLine(dcbz_addr & (~31));
PowerPC::ClearDCacheLine(dcbz_addr & (~31));
}

void Interpreter::dcbz_l(UGeckoInstruction inst)
Expand All @@ -531,8 +539,7 @@ void Interpreter::dcbz_l(UGeckoInstruction inst)
return;
}

// FAKE: clear memory instead of clearing the cache block
PowerPC::ClearCacheLine(address & (~31));
PowerPC::ClearDCacheLine(address & (~31));
}

// eciwx/ecowx technically should access the specified device
Expand Down
Expand Up @@ -250,9 +250,33 @@ void Interpreter::mfspr(UGeckoInstruction inst)
rSPR(index) &= ~1;
}
break;

case SPR_XER:
rSPR(index) = PowerPC::GetXER().Hex;
break;

case SPR_UPMC1:
rSPR(index) = rSPR(SPR_PMC1);
break;

case SPR_UPMC2:
rSPR(index) = rSPR(SPR_PMC2);
break;

case SPR_UPMC3:
rSPR(index) = rSPR(SPR_PMC3);
break;

case SPR_UPMC4:
rSPR(index) = rSPR(SPR_PMC4);
break;

case SPR_IABR:
// A strange quirk: reading back this register on hardware will always have the TE (Translation
// enabled) bit set to 0 (despite the bit appearing to function normally when set). This does
// not apply to the DABR.
rGPR[inst.RD] = rSPR(index) & ~1;
return;
}
rGPR[inst.RD] = rSPR(index);
}
Expand Down
4 changes: 3 additions & 1 deletion Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Expand Up @@ -229,6 +229,8 @@ void Jit64::lXXx(UGeckoInstruction inst)

void Jit64::dcbx(UGeckoInstruction inst)
{
FALLBACK_IF(m_accurate_cpu_cache_enabled);

INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);

Expand Down Expand Up @@ -444,7 +446,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
ABI_CallFunctionR(PowerPC::ClearDCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

if (emit_fast_path)
Expand Down
5 changes: 5 additions & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
Expand Up @@ -412,6 +412,11 @@ void Jit64::mfspr(UGeckoInstruction inst)
case SPR_PMC2:
case SPR_PMC3:
case SPR_PMC4:
case SPR_UPMC1:
case SPR_UPMC2:
case SPR_UPMC3:
case SPR_UPMC4:
case SPR_IABR:
FALLBACK_IF(true);
default:
{
Expand Down
5 changes: 4 additions & 1 deletion Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp
Expand Up @@ -61,6 +61,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);

if (m_accurate_cpu_cache_enabled)
mode = MemAccessMode::AlwaysSafe;

const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;

Expand Down Expand Up @@ -228,7 +231,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
MOVP2R(ARM64Reg::X8, &PowerPC::ClearCacheLine);
MOVP2R(ARM64Reg::X8, &PowerPC::ClearDCacheLine);
BLR(ARM64Reg::X8);
}
else
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Expand Up @@ -635,6 +635,8 @@ void JitArm64::stmw(UGeckoInstruction inst)

void JitArm64::dcbx(UGeckoInstruction inst)
{
FALLBACK_IF(m_accurate_cpu_cache_enabled);

INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);

Expand Down
Expand Up @@ -395,6 +395,15 @@ void JitArm64::mfspr(UGeckoInstruction inst)
break;
case SPR_WPAR:
case SPR_DEC:
case SPR_PMC1:
case SPR_PMC2:
case SPR_PMC3:
case SPR_PMC4:
case SPR_UPMC1:
case SPR_UPMC2:
case SPR_UPMC3:
case SPR_UPMC4:
case SPR_IABR:
FALLBACK_IF(true);
default:
gpr.BindToRegister(d, false);
Expand Down
7 changes: 7 additions & 0 deletions Source/Core/Core/PowerPC/JitCommon/JitBase.cpp
Expand Up @@ -58,6 +58,13 @@ void JitBase::RefreshConfig()
m_fastmem_enabled = Config::Get(Config::MAIN_FASTMEM);
m_mmu_enabled = Core::System::GetInstance().IsMMUMode();
m_pause_on_panic_enabled = Core::System::GetInstance().IsPauseOnPanicMode();
m_accurate_cpu_cache_enabled = Config::Get(Config::MAIN_ACCURATE_CPU_CACHE);
if (m_accurate_cpu_cache_enabled)
{
m_fastmem_enabled = false;
// This hack is unneeded if the data cache is being emulated.
m_low_dcbz_hack = false;
}

analyzer.SetDebuggingEnabled(m_enable_debugging);
analyzer.SetBranchFollowingEnabled(Config::Get(Config::MAIN_JIT_FOLLOW_BRANCH));
Expand Down
1 change: 1 addition & 0 deletions Source/Core/Core/PowerPC/JitCommon/JitBase.h
Expand Up @@ -136,6 +136,7 @@ class JitBase : public CPUCoreBase
bool m_fastmem_enabled = false;
bool m_mmu_enabled = false;
bool m_pause_on_panic_enabled = false;
bool m_accurate_cpu_cache_enabled = false;

void RefreshConfig();

Expand Down

0 comments on commit eeeab3c

Please sign in to comment.