Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
bos committed Oct 3, 2012
2 parents 49694d7 + 0a3f580 commit c8840d4
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
14 changes: 14 additions & 0 deletions benchmarks/Benchmarks.hs
Expand Up @@ -55,6 +55,9 @@ main = do
cSipHash (PS fp off len) = cSipHash (PS fp off len) =
inlinePerformIO . withForeignPtr fp $ \ptr -> inlinePerformIO . withForeignPtr fp $ \ptr ->
return $! c_siphash 2 4 k0 k1 (ptr `plusPtr` off) (fromIntegral len) return $! c_siphash 2 4 k0 k1 (ptr `plusPtr` off) (fromIntegral len)
sse41SipHash (PS fp off len) =
inlinePerformIO . withForeignPtr fp $ \ptr ->
return $! sse41_siphash k0 k1 (ptr `plusPtr` off) (fromIntegral len)


withForeignPtr fp5 $ \ p5 -> withForeignPtr fp5 $ \ p5 ->
withForeignPtr fp8 $ \ p8 -> withForeignPtr fp8 $ \ p8 ->
Expand Down Expand Up @@ -111,6 +114,15 @@ main = do
, bench "512" $ whnf cSipHash bs512 , bench "512" $ whnf cSipHash bs512
, bench "2^20" $ whnf cSipHash bs1Mb , bench "2^20" $ whnf cSipHash bs1Mb
] ]
, bgroup "sse41SipHash"
[ bench "5" $ whnf sse41SipHash bs5
, bench "8" $ whnf sse41SipHash bs8
, bench "11" $ whnf sse41SipHash bs11
, bench "40" $ whnf sse41SipHash bs40
, bench "128" $ whnf sse41SipHash bs128
, bench "512" $ whnf sse41SipHash bs512
, bench "2^20" $ whnf sse41SipHash bs1Mb
]
, bgroup "pkgSipHash" , bgroup "pkgSipHash"
[ bench "5" $ whnf hsSipHash bs5 [ bench "5" $ whnf hsSipHash bs5
, bench "8" $ whnf hsSipHash bs8 , bench "8" $ whnf hsSipHash bs8
Expand All @@ -132,3 +144,5 @@ new (I# n#) = unBA (runST $ ST $ \s1 ->


foreign import ccall unsafe "siphash" c_siphash foreign import ccall unsafe "siphash" c_siphash
:: CInt -> CInt -> Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64 :: CInt -> CInt -> Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64
foreign import ccall unsafe "siphash_sse41" sse41_siphash
:: Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64
93 changes: 93 additions & 0 deletions benchmarks/cbits/siphash-sse41.c
@@ -0,0 +1,93 @@
#include <smmintrin.h>

#include <stdint.h>

typedef uint64_t u64;
typedef uint32_t u32;
typedef uint8_t u8;


#define SIPHASH_ROUNDS 2
#define SIPHASH_FINALROUNDS 4

/*
typedef uint8_t u8;
typedef uint32_t u32;
typedef uint64_t u64;
*/

// Specialized for siphash, do not reuse
#define rotate16(x) _mm_shufflehi_epi16((x), _MM_SHUFFLE(2,1,0,3))

#define _mm_roti_epi64(x, c) (((c) == 16) ? rotate16((x)) : _mm_xor_si128(_mm_slli_epi64((x), (c)), _mm_srli_epi64((x), 64-(c))))
//#define _mm_roti_epi64(x, c) _mm_xor_si128(_mm_slli_epi64((x), (c)), _mm_srli_epi64((x), 64-(c)))


u64 siphash_sse41(u64 _k0, u64 _k1, const unsigned char *m, size_t n)
{
__m128i v0, v1, v02, v13;
__m128i k0;
__m128i mi, mask, len, h;
const __m128i zero = _mm_setzero_si128();
size_t i, k;
union { u64 gpr; __m128i xmm; } hash;
unsigned char key[16];

((u64 *)key)[0] = _k0;
((u64 *)key)[1] = _k1;

k0 = _mm_loadu_si128((__m128i*)(key + 0));

v0 = _mm_xor_si128(k0, _mm_set_epi32(0x646f7261, 0x6e646f6d, 0x736f6d65, 0x70736575));
v1 = _mm_xor_si128(k0, _mm_set_epi32(0x74656462, 0x79746573, 0x6c796765, 0x6e657261));

v02 = _mm_unpacklo_epi64(v0, v1);
v13 = _mm_unpackhi_epi64(v0, v1);

#define HALF_ROUND(a,b,s,t) \
do \
{ \
__m128i b1,b2; \
a = _mm_add_epi64(a, b); \
b1 = _mm_roti_epi64(b, s); b2 = _mm_roti_epi64(b, t); b = _mm_blend_epi16(b1, b2, 0xF0); \
b = _mm_xor_si128(b, a); \
} while(0)

#define COMPRESS(v02,v13) \
do \
{ \
HALF_ROUND(v02,v13,13,16); \
v02 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \
HALF_ROUND(v02,v13,17,21); \
v02 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \
} while(0)

for(i = 0; i < (n-n%8); i += 8)
{
mi = _mm_loadl_epi64((__m128i*)(m + i));
v13 = _mm_xor_si128(v13, _mm_unpacklo_epi64(zero, mi));
for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v02,v13);
v02 = _mm_xor_si128(v02, mi);
}

mi = _mm_loadl_epi64((__m128i*)(m + i));
len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0);
mask = _mm_srli_epi64(_mm_set_epi32(0, 0, 0xffffffff, 0xffffffff), 8*(8-n%8));
mi = _mm_xor_si128(_mm_and_si128(mi, mask), len);

v13 = _mm_xor_si128(v13, _mm_unpacklo_epi64(zero, mi));
for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v02,v13);
v02 = _mm_xor_si128(v02, mi);

v02 = _mm_xor_si128(v02, _mm_set_epi32(0, 0xff, 0, 0));
for(k = 0; k < SIPHASH_FINALROUNDS; ++k) COMPRESS(v02,v13);

v0 = _mm_xor_si128(v02, v13);
v0 = _mm_xor_si128(v0, _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(zero), _mm_castsi128_ps(v0))));
hash.xmm = v0;

#undef COMPRESS
#undef HALF_ROUND
//return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32);
return hash.gpr;
}
2 changes: 2 additions & 0 deletions benchmarks/hashable-benchmarks.cabal
Expand Up @@ -6,9 +6,11 @@ cabal-version: >=1.2


executable hashable-benchmarks executable hashable-benchmarks
ghc-options: -Wall -O2 ghc-options: -Wall -O2
cc-options: -msse4.1
c-sources: c-sources:
../cbits/hashByteString.c ../cbits/hashByteString.c
cbits/siphash.c cbits/siphash.c
cbits/siphash-sse41.c
hs-source-dirs: .. . hs-source-dirs: .. .
main-is: Benchmarks.hs main-is: Benchmarks.hs
other-modules: other-modules:
Expand Down

0 comments on commit c8840d4

Please sign in to comment.