Permalink
Browse files

TexCache: Add NEON alpha checks for Vulkan.

  • Loading branch information...
unknownbrackets committed Nov 13, 2017
1 parent f087b87 commit 65e71f57c7908e21d1397df489212c17cde9281a
Showing with 58 additions and 4 deletions.
  1. +12 −4 GPU/Common/TextureDecoder.cpp
  2. +44 −0 GPU/Common/TextureDecoderNEON.cpp
  3. +2 −0 GPU/Common/TextureDecoderNEON.h
@@ -659,12 +659,16 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
}
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h) {
#ifdef _M_SSE
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaRGBA4444SSE2(pixelData, stride, w, h);
}
#elif PPSSPP_ARCH(ARMV7) || PPSSPP_ARCH(ARM64)
if (cpu_info.bNEON) {
return CheckAlphaRGBA4444NEON(pixelData, stride, w, h);
}
#endif
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
@@ -688,12 +692,16 @@ CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w
}
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
#ifdef _M_SSE
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaRGBA5551SSE2(pixelData, stride, w, h);
}
#elif PPSSPP_ARCH(ARMV7) || PPSSPP_ARCH(ARM64)
if (cpu_info.bNEON) {
return CheckAlphaRGBA5551NEON(pixelData, stride, w, h);
}
#endif
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
@@ -342,4 +342,48 @@ CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w,
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0xF000);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0x8000);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
#endif
@@ -24,3 +24,5 @@ u32 ReliableHash32NEON(const void *input, size_t len, u32 seed);
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h);

0 comments on commit 65e71f5

Please sign in to comment.