Skip to content

Commit

Permalink
Use short->int cvt instructions when possible
Browse files Browse the repository at this point in the history
Signed-off-by: Matthias Kretz <kretz@kde.org>
  • Loading branch information
mattkretz committed Oct 25, 2018
1 parent 17d7fee commit 6fb56ab
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 20 deletions.
20 changes: 4 additions & 16 deletions Vc/avx/simd_cast.h
Expand Up @@ -1246,14 +1246,8 @@ Vc_SIMD_CAST_AVX_2(double_v, float_v) { return AVX::concat(_mm256_cvtpd_ps(x0.d
Vc_SIMD_CAST_AVX_1(double_v, int_v) { return AVX::zeroExtend(_mm256_cvttpd_epi32(x.data())); }
Vc_SIMD_CAST_AVX_1( float_v, int_v) { return _mm256_cvttps_epi32(x.data()); }
Vc_SIMD_CAST_AVX_1( uint_v, int_v) { return x.data(); }
Vc_SIMD_CAST_AVX_1( short_v, int_v) {
const auto tmp = Mem::permute4x64<X0, X2, X1, X3>(x.data());
return _mm256_srai_epi32(_mm256_unpacklo_epi16(tmp, tmp), 16);
}
Vc_SIMD_CAST_AVX_1(ushort_v, int_v) {
const auto tmp = Mem::permute4x64<X0, X2, X1, X3>(x.data());
return _mm256_srli_epi32(_mm256_unpacklo_epi16(tmp, tmp), 16);
}
Vc_SIMD_CAST_AVX_1( short_v, int_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
Vc_SIMD_CAST_AVX_1(ushort_v, int_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
#endif

// 2: to int_v {{{3
Expand All @@ -1273,14 +1267,8 @@ Vc_SIMD_CAST_AVX_1( float_v, uint_v) {
_mm256_castps_si256(AVX::cmpge_ps(x.data(), AVX::set2power31_ps())));
}
Vc_SIMD_CAST_AVX_1( int_v, uint_v) { return x.data(); }
Vc_SIMD_CAST_AVX_1( short_v, uint_v) {
const auto tmp = Mem::permute4x64<X0, X2, X1, X3>(x.data());
return _mm256_srai_epi32(_mm256_unpacklo_epi16(tmp, tmp), 16);
}
Vc_SIMD_CAST_AVX_1(ushort_v, uint_v) {
const auto tmp = Mem::permute4x64<X0, X2, X1, X3>(x.data());
return _mm256_srli_epi32(_mm256_unpacklo_epi16(tmp, tmp), 16);
}
Vc_SIMD_CAST_AVX_1( short_v, uint_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
Vc_SIMD_CAST_AVX_1(ushort_v, uint_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
#endif

// 2: to uint_v {{{3
Expand Down
20 changes: 16 additions & 4 deletions Vc/sse/casts.h
Expand Up @@ -68,8 +68,20 @@ Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , int >) { return _mm
Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, int >) { return _mm_cvttpd_epi32(v); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , int >) { return v; }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , int >) { return v; }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , int >) { return _mm_srai_epi32(_mm_unpacklo_epi16(v, v), 16); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, int >) { return _mm_srli_epi32(_mm_unpacklo_epi16(v, v), 16); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , int >) {
#ifdef Vc_IMPL_SSE4_1
return _mm_cvtepi16_epi32(v);
#else
return _mm_srai_epi32(_mm_unpacklo_epi16(v, v), 16);
#endif
}
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, int >) {
#ifdef Vc_IMPL_SSE4_1
return _mm_cvtepu16_epi32(v);
#else
return _mm_srli_epi32(_mm_unpacklo_epi16(v, v), 16);
#endif
}
Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , uint >) {
return _mm_castps_si128(
blendv_ps(_mm_castsi128_ps(_mm_cvttps_epi32(v)),
Expand All @@ -91,8 +103,8 @@ Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, uint >) {
}
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , uint >) { return v; }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , uint >) { return v; }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , uint >) { return _mm_srai_epi32(_mm_unpacklo_epi16(v, v), 16); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, uint >) { return _mm_srli_epi32(_mm_unpacklo_epi16(v, v), 16); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , uint >) { return convert(v, ConvertTag<short, int>()); }
Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, uint >) { return convert(v, ConvertTag<ushort, int>()); }
Vc_INTRINSIC __m128 convert(__m128 v, ConvertTag<float , float >) { return v; }
Vc_INTRINSIC __m128 convert(__m128d v, ConvertTag<double, float >) { return _mm_cvtpd_ps(v); }
Vc_INTRINSIC __m128 convert(__m128i v, ConvertTag<int , float >) { return _mm_cvtepi32_ps(v); }
Expand Down

0 comments on commit 6fb56ab

Please sign in to comment.