Permalink
Browse files

TexCache: Simplify CheckAlpha funcs and SIMD.

Only check for full alpha now, which is simpler.
  • Loading branch information...
unknownbrackets committed Nov 13, 2017
1 parent e3b3828 commit f087b87b0c917f25df02d92b01a0fcdb3bd16802
Showing with 79 additions and 181 deletions.
  1. +59 −122 GPU/Common/TextureDecoder.cpp
  2. +20 −59 GPU/Common/TextureDecoderNEON.cpp
@@ -438,92 +438,57 @@ static inline u32 CombineSSEBitsToDWORD(const __m128i &v) {
}
CheckAlphaResult CheckAlphaRGBA8888SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i zero = _mm_setzero_si128();
const __m128i full = _mm_set1_epi32(0xFF);
const __m128i mask = _mm_set1_epi32(0xFF000000);
const __m128i *p = (const __m128i *)pixelData;
const int w4 = w / 4;
const int stride4 = stride / 4;
// Have alpha values == 0 been seen?
__m128i hasZeroCursor = _mm_setzero_si128();
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
// Have alpha values > 0 and < 0xFF been seen?
__m128i hasAnyCursor = _mm_setzero_si128();
for (int i = 0; i < w4; ++i) {
const __m128i a = _mm_srli_epi32(_mm_load_si128(&p[i]), 24);
const __m128i isZero = _mm_cmpeq_epi32(a, zero);
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
// If a = FF, isNotFull will be 0 -> hasAny will be 0.
// If a = 00, a & isNotFull will be 0 -> hasAny will be 0.
// In any other case, hasAny will have some bits set.
const __m128i isNotFull = _mm_cmplt_epi32(a, full);
hasAnyCursor = _mm_or_si128(hasAnyCursor, _mm_and_si128(a, isNotFull));
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
p += stride4;
// We check any early, in case we can skip the rest of the rows.
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
}
// Now let's sum up the bits.
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
p += stride4;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR4444SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i zero = _mm_setzero_si128();
const __m128i full = _mm_set1_epi16((short)0xF000);
const __m128i mask = _mm_set1_epi16((short)0x000F);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i hasZeroCursor = _mm_setzero_si128();
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
__m128i hasAnyCursor = _mm_setzero_si128();
for (int i = 0; i < w8; ++i) {
// This moves XXXA to A000.
const __m128i a = _mm_slli_epi16(_mm_load_si128(&p[i]), 12);
// At least one bit in isZero, and therefore hasZeroCursor, will get set if there's a zero.
const __m128i isZero = _mm_cmpeq_epi16(a, zero);
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
// If a = F, isFull will be 1 -> hasAny will be 0.
// If a = 0, a & !isFull will be 0 -> hasAny will be 0.
// In any other case, hasAny will have some bits set.
const __m128i isFull = _mm_cmpeq_epi32(a, full);
const __m128i hasAny = _mm_andnot_si128(isFull, a);
hasAnyCursor = _mm_or_si128(hasAnyCursor, hasAny);
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
p += stride8;
// We check any early, in case we can skip the rest of the rows.
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
}
// Now let's sum up the bits.
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
p += stride8;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi16(1);
const __m128i mask = _mm_set1_epi16((short)0x0001);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
@@ -548,43 +513,28 @@ CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w,
}
CheckAlphaResult CheckAlphaRGBA4444SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i zero = _mm_setzero_si128();
const __m128i full = _mm_set1_epi16(0x000F);
const __m128i mask = _mm_set1_epi16((short)0xF000);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i hasZeroCursor = _mm_setzero_si128();
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
__m128i hasAnyCursor = _mm_setzero_si128();
for (int i = 0; i < w8; ++i) {
const __m128i a = _mm_srli_epi16(_mm_load_si128(&p[i]), 12);
const __m128i isZero = _mm_cmpeq_epi16(a, zero);
hasZeroCursor = _mm_or_si128(hasZeroCursor, isZero);
// If a = F, isNotFull will be 0 -> hasAny will be 0.
// If a = 0, a & isNotFull will be 0 -> hasAny will be 0.
// In any other case, hasAny will have some bits set.
const __m128i isNotFull = _mm_cmplt_epi32(a, full);
hasAnyCursor = _mm_or_si128(hasAnyCursor, _mm_and_si128(a, isNotFull));
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
p += stride8;
// We check any early, in case we can skip the rest of the rows.
if (CombineSSEBitsToDWORD(hasAnyCursor) != 0) {
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
}
// Now let's sum up the bits.
if (CombineSSEBitsToDWORD(hasZeroCursor) != 0) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
p += stride8;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551SSE2(const u32 *pixelData, int stride, int w, int h) {
@@ -625,26 +575,22 @@ CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w
#endif
}
u32 hitZeroAlpha = 0;
const u32 *p = pixelData;
for (int y = 0; y < h; ++y) {
u32 bits = 0xFF000000;
for (int i = 0; i < w; ++i) {
u32 a = p[i] & 0xFF000000;
hitZeroAlpha |= a ^ 0xFF000000;
if (a != 0xFF000000 && a != 0) {
// We're done, we hit non-zero, non-full alpha.
return CHECKALPHA_ANY;
}
bits &= p[i];
}
if (bits != 0xFF000000) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride;
}
if (hitZeroAlpha) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h) {
@@ -659,29 +605,25 @@ CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w
#endif
}
u32 hitZeroAlpha = 0;
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0x000F000F;
for (int i = 0; i < w2; ++i) {
u32 a = p[i] & 0x000F000F;
hitZeroAlpha |= a ^ 0x000F000F;
if (a != 0x000F000F && a != 0x0000000F && a != 0x000F0000 && a != 0) {
// We're done, we hit non-zero, non-full alpha.
return CHECKALPHA_ANY;
}
bits &= p[i];
}
if (bits != 0x000F000F) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride2;
}
if (hitZeroAlpha) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h) {
@@ -700,13 +642,13 @@ CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
u32 bits = 0x00010001;
for (int y = 0; y < h; ++y) {
u32 bits = 0x00010001;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if ((bits ^ 0x00010001) != 0) {
if (bits != 0x00010001) {
return CHECKALPHA_ANY;
}
@@ -724,29 +666,25 @@ CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w
}
#endif
u32 hitZeroAlpha = 0;
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0xF000F000;
for (int i = 0; i < w2; ++i) {
u32 a = p[i] & 0xF000F000;
hitZeroAlpha |= a ^ 0xF000F000;
if (a != 0xF000F000 && a != 0xF0000000 && a != 0x0000F000 && a != 0) {
// We're done, we hit non-zero, non-full alpha.
return CHECKALPHA_ANY;
}
bits &= p[i];
}
if (bits != 0xF000F000) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride2;
}
if (hitZeroAlpha) {
return CHECKALPHA_ANY;
} else {
return CHECKALPHA_FULL;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
@@ -757,18 +695,17 @@ CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w
}
#endif
u32 bits = 0x80008000;
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0x80008000;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if ((bits ^ 0x80008000) != 0) {
if (bits != 0x80008000) {
return CHECKALPHA_ANY;
}
Oops, something went wrong.

0 comments on commit f087b87

Please sign in to comment.