Permalink
Browse files

NEON-optimize CLUT loads

  • Loading branch information...
hrydgard committed Aug 15, 2017
1 parent 50d7719 commit d19d8a8bb8324d55f5c8b744854b2d28f2997e95
Showing with 20 additions and 0 deletions.
  1. +20 −0 GPU/Common/TextureCacheCommon.cpp
@@ -33,6 +33,9 @@
#if defined(_M_SSE)
#include <emmintrin.h>
#endif
#if PPSSPP_ARCH(ARM_NEON)
#include <arm_neon.h>
#endif
// Videos should be updated every few frames, so we forget quickly.
#define VIDEO_DECIMATE_AGE 4
@@ -931,6 +934,23 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#elif PPSSPP_ARCH(ARM_NEON)
if (bytes == loadBytes) {
const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
uint32_t *dest = (uint32_t *)clutBufRaw_;
int numBlocks = bytes / 32;
for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
uint32x4_t data1 = vld1q_u32(source);
uint32x4_t data2 = vld1q_u32(source + 4);
vst1q_u32(dest, data1);
vst1q_u32(dest + 4, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#else
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {

0 comments on commit d19d8a8

Please sign in to comment.