Skip to content

Commit

Permalink
Remove SSE4 path from Vec4<int>::operator*
Browse files Browse the repository at this point in the history
  • Loading branch information
fp64 committed Jul 1, 2023
1 parent f133739 commit cd9f01c
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 13 deletions.
22 changes: 10 additions & 12 deletions GPU/Math3D.h
Expand Up @@ -1387,20 +1387,18 @@ template<>
inline Vec4<int> Vec4<int>::operator * (const Vec4 &other) const {
__m128i a = SAFE_M128I(ivec);
__m128i b = SAFE_M128I(other.ivec);
#if _M_SSE >= 0x401
return Vec4<int>(_mm_mullo_epi32(a, b));
#else
// This is what clang does. Seems about as good
// as it gets.
// Intel in its immense wisdom decided that
// SSE2 does not get _mm_mullo_epi32(),
// so we do it this way. This is what clang does,
// which seems about as good as it gets.
__m128i m02 = _mm_mul_epu32(a, b);
__m128i m13 = _mm_mul_epu32( // 0xF5 -> [1, 1, 3, 3]
_mm_shuffle_epi32(a, 0xF5),
_mm_shuffle_epi32(b, 0xF5));
__m128i ret = _mm_unpacklo_epi32( // 0xE8 -> [0, 2, 2, 3]
_mm_shuffle_epi32(m02, 0xE8),
_mm_shuffle_epi32(m13, 0xE8));
__m128i m13 = _mm_mul_epu32(
_mm_shuffle_epi32(a, _MM_SHUFFLE(3, 3, 1, 1)),
_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)));
__m128i ret = _mm_unpacklo_epi32(
_mm_shuffle_epi32(m02, _MM_SHUFFLE(3, 2, 2, 0)),
_mm_shuffle_epi32(m13, _MM_SHUFFLE(3, 2, 2, 0)));
return Vec4<int>(ret);
#endif
}

template<> template<>
Expand Down
2 changes: 1 addition & 1 deletion GPU/Software/Sampler.cpp
Expand Up @@ -748,7 +748,7 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, const u8
Vec4<int> texcolor_br = Vec4<int>::FromRGBA(c.v[3]);
Vec4<int> top = texcolor_tl * (0x10 - frac_u) + texcolor_tr * frac_u;
Vec4<int> bot = texcolor_bl * (0x10 - frac_u) + texcolor_br * frac_u;
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) >> 8);
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) >> (4 + 4));
#endif
}

Expand Down

0 comments on commit cd9f01c

Please sign in to comment.