From 0aba5ff3c18d75fe9d08991cf7c7269a73cdd7ed Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Dec 2022 12:38:01 -0800 Subject: [PATCH] TexCache: Correct alpha mask checks for SSE2. Should have been shifts by byte (4/8), but let's just switch to shuffles anyway. These were always shifting in zeros and failing. --- GPU/Common/TextureDecoder.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/GPU/Common/TextureDecoder.cpp b/GPU/Common/TextureDecoder.cpp index d7788077a625..22e45b151be6 100644 --- a/GPU/Common/TextureDecoder.cpp +++ b/GPU/Common/TextureDecoder.cpp @@ -641,16 +641,12 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) { #ifdef _M_SSE inline u32 SSEReduce32And(__m128i value) { - // TODO: Should use a shuffle instead of slri, probably. - value = _mm_and_si128(value, _mm_srli_si128(value, 64)); - value = _mm_and_si128(value, _mm_srli_si128(value, 32)); + value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 0, 3, 2))); + value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1))); return _mm_cvtsi128_si32(value); } inline u32 SSEReduce16And(__m128i value) { - // TODO: Should use a shuffle instead of slri, probably. - value = _mm_and_si128(value, _mm_srli_si128(value, 64)); - value = _mm_and_si128(value, _mm_srli_si128(value, 32)); - u32 mask = _mm_cvtsi128_si32(value); + u32 mask = SSEReduce32And(value); return mask & (mask >> 16); } #endif