Skip to content

Commit

Permalink
TexCache: Correct alpha mask checks for SSE2.
Browse files Browse the repository at this point in the history
Should have been shifts by byte (4/8), but let's just switch to shuffles
anyway.  These were always shifting in zeros and failing.
  • Loading branch information
unknownbrackets committed Dec 3, 2022
1 parent 38eb0a7 commit 0aba5ff
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions GPU/Common/TextureDecoder.cpp
Expand Up @@ -641,16 +641,12 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {

#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 0, 3, 2)));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
u32 mask = _mm_cvtsi128_si32(value);
u32 mask = SSEReduce32And(value);
return mask & (mask >> 16);
}
#endif
Expand Down

0 comments on commit 0aba5ff

Please sign in to comment.