Skip to content

Commit

Permalink
Merge pull request #8965 from hrydgard/exynos-cache-fix
Browse files Browse the repository at this point in the history
Port over the Exynos cacheline size fix from Dolphin.
  • Loading branch information
hrydgard committed Sep 10, 2016
2 parents 1c47778 + 03279e1 commit cc8f66b
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 39 deletions.
46 changes: 31 additions & 15 deletions Common/Arm64Emitter.cpp
Expand Up @@ -313,24 +313,40 @@ void ARM64XEmitter::FlushIcache()

void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
{
if (cpu_info.sBugs.bExynos8890Invalidation)
{
// Over invalidate to force this CPU to listen.
start = m_startcode + 4096 < start ? start - 4096 : m_startcode;
end += 4096;
}

#if defined(IOS)
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
#else
#if (defined(__clang__) && !defined(_M_IX86) && !defined(_M_X64)) || defined(ANDROID)
__clear_cache(start, end);
#else
#if !defined(_M_IX86) && !defined(_M_X64)
__builtin___clear_cache(start, end);
#endif
#endif
#elif !defined(_M_IX86) && !defined(_M_X64)
// Code from Dolphin, contributed by the Mono project.

// Don't rely on GCC's __clear_cache implementation, as it caches
// icache/dcache cache line sizes, that can vary between cores on
// big.LITTLE architectures.
u64 addr, ctr_el0;
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
size_t isize, dsize;

__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
isize = 4 << ((ctr_el0 >> 0) & 0xf);
dsize = 4 << ((ctr_el0 >> 16) & 0xf);

// use the global minimum cache line size
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;

addr = (u64)start & ~(u64)(dsize - 1);
for (; addr < (u64)end; addr += dsize)
// use "civac" instead of "cvau", as this is the suggested workaround for
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");

addr = (u64)start & ~(u64)(isize - 1);
for (; addr < (u64)end; addr += isize)
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");

__asm__ volatile("dsb ish" : : : "memory");
__asm__ volatile("isb" : : : "memory");
#endif
}

Expand Down
15 changes: 8 additions & 7 deletions Common/CPUDetect.h
Expand Up @@ -89,14 +89,15 @@ struct CPUInfo {
bool bXBurst1;
bool bXBurst2;

// Bugs
// Quirks
struct {
// Samsung Galaxy S7 devices (Exynos 8890) have a bug that causes invalidation to work incorrectly.
// This may be caused by interaction between the separate CPU cores.
// Padding jit blocks and over-invalidating seems to "solve" it.
// Only affects ARM64.
bool bExynos8890Invalidation;
} sBugs;
// Samsung Galaxy S7 devices (Exynos 8890) have a big.LITTLE configuration where the cacheline size differs between big and LITTLE.
// GCC's cache clearing function would detect the cacheline size on one and keep it for later. When clearing
// with the wrong cacheline size on the other, that's an issue. In case we want to do something different in this
// situation in the future, let's keep this as a quirk, but our current code won't detect it reliably
// if it happens on new archs. We now use better clearing code on ARM64 that doesn't have this issue.
bool bExynos8890DifferingCachelineSizes;
} sQuirks;

// Call Detect()
explicit CPUInfo();
Expand Down
8 changes: 0 additions & 8 deletions Core/MIPS/ARM64/Arm64Jit.cpp
Expand Up @@ -333,14 +333,6 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
if (dontLogBlocks > 0)
dontLogBlocks--;

if (cpu_info.sBugs.bExynos8890Invalidation) {
// What a waste. If we don't do both this and over-invalidate, the device crashes.
// This space won't ever get run, but it's wasted jit cache space.
for (int i = 0; i < 32; ++i) {
HINT(HINT_NOP);
}
}

// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();

Expand Down
8 changes: 0 additions & 8 deletions GPU/Common/VertexDecoderArm64.cpp
Expand Up @@ -278,14 +278,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int

RET();

if (cpu_info.sBugs.bExynos8890Invalidation) {
// Apparently the vertex cache hasn't been the problem, but adding this here for the same
// reasons as the standard jit.
for (int i = 0; i < 32; ++i) {
HINT(HINT_NOP);
}
}

FlushIcache();

if (log) {
Expand Down
2 changes: 1 addition & 1 deletion android/jni/app-android.cpp
Expand Up @@ -526,7 +526,7 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_init
// Unfortunately, on the Samsung Galaxy S7, this isn't in /proc/cpuinfo.
// We also can't read it from __system_property_get.
if (buildBoard == "universal8890") {
cpu_info.sBugs.bExynos8890Invalidation = true;
cpu_info.sQuirks.bExynos8890DifferingCachelineSizes = true;
}

NativeGetAppInfo(&app_name, &app_nice_name, &landscape, &version);
Expand Down

0 comments on commit cc8f66b

Please sign in to comment.