Skip to content

Commit

Permalink
One more NEON optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Apr 14, 2022
1 parent ffcfef6 commit a3d650d
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions GPU/Common/TextureCacheCommon.cpp
Expand Up @@ -1481,6 +1481,19 @@ void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
uint32x4_t colors = vld1q_u32(src);
wideMask = vandq_u32(wideMask, colors);
vst1q_u32(dst, colors);
src += 4;
dst += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif

for (int i = 0; i < width; i++) {
Expand Down

0 comments on commit a3d650d

Please sign in to comment.