Permalink
Browse files

Use optimized SSE2 implementation even if _mm_shuffle_epi8 is not ava…

…ilable
  • Loading branch information...
jedisct1 committed Aug 11, 2018
1 parent 00f1332 commit 39eb529905ce118b674a7723c0d2b48074b9986d
Showing with 19 additions and 2 deletions.
  1. +2 −2 impl/gimli-core.h
  2. +17 −0 impl/gimli-core/{ssse3.h → sse2.h}
View
@@ -1,5 +1,5 @@
#ifdef __SSSE3__
# include "gimli-core/ssse3.h"
#ifdef __SSE2__
# include "gimli-core/sse2.h"
#else
# include "gimli-core/portable.h"
#endif
@@ -14,11 +14,28 @@ rotate(__m128i x, int bits)
return _mm_slli_epi32(x, bits) | _mm_srli_epi32(x, 32 - bits);
}
#ifdef __SSSE3__
static inline __m128i
rotate24(__m128i x)
{
return _mm_shuffle_epi8(x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
}
#else
static inline __m128i
rotate24(__m128i x)
{
uint8_t _hydro_attr_aligned_(16) x8[16], y8[16];
_mm_storeu_si128((__m128i *) (void *) x8, x);
y8[ 0] = x8[ 1]; y8[ 1] = x8[ 2]; y8[ 2] = x8[ 3]; y8[ 3] = x8[ 0];
y8[ 4] = x8[ 5]; y8[ 5] = x8[ 6]; y8[ 6] = x8[ 7]; y8[ 7] = x8[ 4];
y8[ 8] = x8[ 9]; y8[ 9] = x8[10]; y8[10] = x8[11]; y8[11] = x8[ 8];
y8[12] = x8[13]; y8[13] = x8[14]; y8[14] = x8[15]; y8[15] = x8[12];
return _mm_loadu_si128((const __m128i *) (const void *) y8);
}
#endif
static const uint32_t coeffs[24] _hydro_attr_aligned_(16) = {
0x9e377904, 0, 0, 0, 0x9e377908, 0, 0, 0, 0x9e37790c, 0, 0, 0,

0 comments on commit 39eb529

Please sign in to comment.