From 9646e31532ef9c4324e3c97ceec4adffe0e1bace Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Sat, 18 May 2024 15:06:32 +0300 Subject: [PATCH] Delete stuff from benchmarks we don't really care about --- hashable-bench/benchmarks/Benchmarks.hs | 113 +------- hashable-bench/benchmarks/cbits/inthash.c | 28 -- .../benchmarks/cbits/siphash-sse2.c | 129 --------- .../benchmarks/cbits/siphash-sse41.c | 86 ------ hashable-bench/benchmarks/cbits/siphash.c | 262 ------------------ hashable-bench/benchmarks/cbits/siphash.h | 68 ----- hashable-bench/benchmarks/cbits/wang.c | 29 -- hashable-bench/hashable-bench.cabal | 30 +- .../src-siphash/Data/Hashable/SipHash.hs | 159 ----------- 9 files changed, 4 insertions(+), 900 deletions(-) delete mode 100644 hashable-bench/benchmarks/cbits/inthash.c delete mode 100644 hashable-bench/benchmarks/cbits/siphash-sse2.c delete mode 100644 hashable-bench/benchmarks/cbits/siphash-sse41.c delete mode 100644 hashable-bench/benchmarks/cbits/siphash.c delete mode 100644 hashable-bench/benchmarks/cbits/siphash.h delete mode 100644 hashable-bench/benchmarks/cbits/wang.c delete mode 100644 hashable-bench/src-siphash/Data/Hashable/SipHash.hs diff --git a/hashable-bench/benchmarks/Benchmarks.hs b/hashable-bench/benchmarks/Benchmarks.hs index 435f4c63..c4ca1814 100644 --- a/hashable-bench/benchmarks/Benchmarks.hs +++ b/hashable-bench/benchmarks/Benchmarks.hs @@ -6,14 +6,10 @@ module Main (main) where import Control.Monad.ST import Criterion.Main import Data.Hashable -import Data.Hashable.SipHash import Data.Int import Foreign.ForeignPtr import GHC.Exts import GHC.ST (ST(..)) -import Data.Word -import Foreign.C.Types (CInt(..), CLong(..), CSize(..)) -import Foreign.Ptr import Data.ByteString.Internal import GHC.Generics (Generic) import qualified Data.ByteString.Lazy as BL @@ -72,27 +68,9 @@ main = do let k0 = 0x4a7330fae70f52e8 k1 = 0x919ea5953a9a1ec9 - sipHash = hashByteString 2 4 k0 k1 + + hsSipHash :: ByteString -> HS.SipHash hsSipHash = HS.hash (HS.SipKey k0 k1) - cSipHash (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \ptr -> - return $! c_siphash 2 4 k0 k1 (ptr `plusPtr` off) (fromIntegral len) - cSipHash24 (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \ptr -> - return $! c_siphash24 k0 k1 (ptr `plusPtr` off) (fromIntegral len) - fnvHash (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \ptr -> - return $! fnv_hash (ptr `plusPtr` off) (fromIntegral len) 2166136261 -#ifdef HAVE_SSE2 - sse2SipHash (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \ptr -> - return $! sse2_siphash k0 k1 (ptr `plusPtr` off) (fromIntegral len) -#endif -#ifdef HAVE_SSE41 - sse41SipHash (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \ptr -> - return $! sse41_siphash k0 k1 (ptr `plusPtr` off) (fromIntegral len) -#endif withForeignPtr fp5 $ \ p5 -> withForeignPtr fp8 $ \ p8 -> @@ -181,55 +159,6 @@ main = do , bench "Int64" $ whnf hash (0x7eadbeefdeadbeef :: Int64) , bench "Double" $ whnf hash (0.3780675796601578 :: Double) ] - , bgroup "sipHash" - [ bench "5" $ whnf sipHash bs5 - , bench "8" $ whnf sipHash bs8 - , bench "11" $ whnf sipHash bs11 - , bench "40" $ whnf sipHash bs40 - , bench "128" $ whnf sipHash bs128 - , bench "512" $ whnf sipHash bs512 - , bench "2^20" $ whnf sipHash bs1Mb - ] - , bgroup "cSipHash" - [ bench "5" $ whnf cSipHash bs5 - , bench "8" $ whnf cSipHash bs8 - , bench "11" $ whnf cSipHash bs11 - , bench "40" $ whnf cSipHash bs40 - , bench "128" $ whnf cSipHash bs128 - , bench "512" $ whnf cSipHash bs512 - , bench "2^20" $ whnf cSipHash bs1Mb - ] - , bgroup "cSipHash24" - [ bench "5" $ whnf cSipHash24 bs5 - , bench "8" $ whnf cSipHash24 bs8 - , bench "11" $ whnf cSipHash24 bs11 - , bench "40" $ whnf cSipHash24 bs40 - , bench "128" $ whnf cSipHash24 bs128 - , bench "512" $ whnf cSipHash24 bs512 - , bench "2^20" $ whnf cSipHash24 bs1Mb - ] -#ifdef HAVE_SSE2 - , bgroup "sse2SipHash" - [ bench "5" $ whnf sse2SipHash bs5 - , bench "8" $ whnf sse2SipHash bs8 - , bench "11" $ whnf sse2SipHash bs11 - , bench "40" $ whnf sse2SipHash bs40 - , bench "128" $ whnf sse2SipHash bs128 - , bench "512" $ whnf sse2SipHash bs512 - , bench "2^20" $ whnf sse2SipHash bs1Mb - ] -#endif -#ifdef HAVE_SSE41 - , bgroup "sse41SipHash" - [ bench "5" $ whnf sse41SipHash bs5 - , bench "8" $ whnf sse41SipHash bs8 - , bench "11" $ whnf sse41SipHash bs11 - , bench "40" $ whnf sse41SipHash bs40 - , bench "128" $ whnf sse41SipHash bs128 - , bench "512" $ whnf sse41SipHash bs512 - , bench "2^20" $ whnf sse41SipHash bs1Mb - ] -#endif , bgroup "pkgSipHash" [ bench "5" $ whnf hsSipHash bs5 , bench "8" $ whnf hsSipHash bs8 @@ -239,22 +168,9 @@ main = do , bench "512" $ whnf hsSipHash bs512 , bench "2^20" $ whnf hsSipHash bs1Mb ] - , bgroup "fnv" - [ bench "5" $ whnf fnvHash bs5 - , bench "8" $ whnf fnvHash bs8 - , bench "11" $ whnf fnvHash bs11 - , bench "40" $ whnf fnvHash bs40 - , bench "128" $ whnf fnvHash bs128 - , bench "512" $ whnf fnvHash bs512 - , bench "2^20" $ whnf fnvHash bs1Mb - ] , bgroup "Int" [ bench "id32" $ whnf id (0x7eadbeef :: Int32) , bench "id64" $ whnf id (0x7eadbeefdeadbeef :: Int64) - , bench "wang32" $ whnf hash_wang_32 0xdeadbeef - , bench "wang64" $ whnf hash_wang_64 0xdeadbeefdeadbeef - , bench "jenkins32a" $ whnf hash_jenkins_32a 0xdeadbeef - , bench "jenkins32b" $ whnf hash_jenkins_32b 0xdeadbeef ] , bgroup "Generic" [ bench "product" $ whnf hash exP @@ -271,31 +187,6 @@ new (I# n#) = unBA (runST $ ST $ \s1 -> (# s2, ary #) -> case unsafeFreezeByteArray# ary s2 of (# s3, ba #) -> (# s3, BA ba #)) -foreign import ccall unsafe "hashable_siphash" c_siphash - :: CInt -> CInt -> Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64 -foreign import ccall unsafe "hashable_siphash24" c_siphash24 - :: Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64 -#ifdef HAVE_SSE2 -foreign import ccall unsafe "hashable_siphash24_sse2" sse2_siphash - :: Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64 -#endif -#ifdef HAVE_SSE41 -foreign import ccall unsafe "hashable_siphash24_sse41" sse41_siphash - :: Word64 -> Word64 -> Ptr Word8 -> CSize -> Word64 -#endif - -foreign import ccall unsafe "hashable_fnv_hash" fnv_hash - :: Ptr Word8 -> CLong -> CLong -> CLong - -foreign import ccall unsafe "hashable_wang_32" hash_wang_32 - :: Word32 -> Word32 -foreign import ccall unsafe "hashable_wang_64" hash_wang_64 - :: Word64 -> Word64 -foreign import ccall unsafe "hash_jenkins_32a" hash_jenkins_32a - :: Word32 -> Word32 -foreign import ccall unsafe "hash_jenkins_32b" hash_jenkins_32b - :: Word32 -> Word32 - data PS = PS1 Int Char Bool | PS2 String () diff --git a/hashable-bench/benchmarks/cbits/inthash.c b/hashable-bench/benchmarks/cbits/inthash.c deleted file mode 100644 index 5fc32e98..00000000 --- a/hashable-bench/benchmarks/cbits/inthash.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -/* - * 32-bit hashes by Bob Jenkins. - */ - -uint32_t hash_jenkins_32a(uint32_t a) -{ - a = (a+0x7ed55d16) + (a<<12); - a = (a^0xc761c23c) ^ (a>>19); - a = (a+0x165667b1) + (a<<5); - a = (a+0xd3a2646c) ^ (a<<9); - a = (a+0xfd7046c5) + (a<<3); - a = (a^0xb55a4f09) ^ (a>>16); - return a; -} - -uint32_t hash_jenkins_32b(uint32_t a) -{ - a -= (a<<6); - a ^= (a>>17); - a -= (a<<9); - a ^= (a<<4); - a -= (a<<3); - a ^= (a<<10); - a ^= (a>>15); - return a; -} diff --git a/hashable-bench/benchmarks/cbits/siphash-sse2.c b/hashable-bench/benchmarks/cbits/siphash-sse2.c deleted file mode 100644 index 74762bc1..00000000 --- a/hashable-bench/benchmarks/cbits/siphash-sse2.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * The original code was developed by Samuel Neves, and has been - * only lightly modified. - * - * Used with permission. - */ -#pragma GCC target("sse2") - -#include -#include "siphash.h" - -#define _mm_roti_epi64(x, c) ((16 == (c)) ? _mm_shufflelo_epi16((x), _MM_SHUFFLE(2,1,0,3)) : _mm_xor_si128(_mm_slli_epi64((x), (c)), _mm_srli_epi64((x), 64-(c)))) - -u64 hashable_siphash24_sse2(u64 ik0, u64 ik1, const u8 *m, size_t n) -{ - __m128i v0, v1, v2, v3; - __m128i k0, k1; - __m128i mi, mask, len; - size_t i, k; - union { u64 gpr; __m128i xmm; } hash; - const u8 *p; - - /* We used to use the _mm_seti_epi32 intrinsic to initialize - SSE2 registers. This compiles to a movdqa instruction, - which requires 16-byte alignment. On 32-bit Windows, it - looks like ghc's runtime linker doesn't align ".rdata" - sections as requested, so we got segfaults for our trouble. - - Now we use an intrinsic that cares less about alignment - (_mm_loadu_si128, aka movdqu) instead, and all seems - happy. */ - - static const u32 const iv[6][4] = { - { 0x70736575, 0x736f6d65, 0, 0 }, - { 0x6e646f6d, 0x646f7261, 0, 0 }, - { 0x6e657261, 0x6c796765, 0, 0 }, - { 0x79746573, 0x74656462, 0, 0 }, - { -1, -1, 0, 0 }, - { 255, 0, 0, 0 }, - }; - - k0 = _mm_loadl_epi64((__m128i*)(&ik0)); - k1 = _mm_loadl_epi64((__m128i*)(&ik1)); - - v0 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[0])); - v1 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[1])); - v2 = _mm_xor_si128(k0, _mm_loadu_si128((__m128i*) &iv[2])); - v3 = _mm_xor_si128(k1, _mm_loadu_si128((__m128i*) &iv[3])); - -#define HALF_ROUND(a,b,c,d,s,t) \ - do \ - { \ - a = _mm_add_epi64(a, b); c = _mm_add_epi64(c, d); \ - b = _mm_roti_epi64(b, s); d = _mm_roti_epi64(d, t); \ - b = _mm_xor_si128(b, a); d = _mm_xor_si128(d, c); \ - } while(0) - -#define COMPRESS(v0,v1,v2,v3) \ - do \ - { \ - HALF_ROUND(v0,v1,v2,v3,13,16); \ - v0 = _mm_shufflelo_epi16(v0, _MM_SHUFFLE(1,0,3,2)); \ - HALF_ROUND(v2,v1,v0,v3,17,21); \ - v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(1,0,3,2)); \ - } while(0) - - for(i = 0; i < (n-n%8); i += 8) - { - mi = _mm_loadl_epi64((__m128i*)(m + i)); - v3 = _mm_xor_si128(v3, mi); - if (SIPHASH_ROUNDS == 2) { - COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3); - } else { - for (k = 0; k < SIPHASH_ROUNDS; ++k) - COMPRESS(v0,v1,v2,v3); - } - v0 = _mm_xor_si128(v0, mi); - } - - p = m + n; - - /* We must be careful to not trigger a segfault by reading an - unmapped page. So where is the end of our input? */ - - if (((uintptr_t) p & 4095) == 0) - /* Exactly at a page boundary: do not read past the end. */ - mi = _mm_setzero_si128(); - else if (((uintptr_t) p & 4095) <= 4088) - /* Inside a page: safe to read past the end, as we'll - mask out any bits we shouldn't have looked at below. */ - mi = _mm_loadl_epi64((__m128i*)(m + i)); - else - /* Within 8 bytes of the end of a page: ensure that - our final read re-reads some bytes so that we do - not cross the page boundary, then shift our result - right so that the re-read bytes vanish. */ - mi = _mm_srli_epi64(_mm_loadl_epi64((__m128i*)(((uintptr_t) m + i) & ~7)), - 8 * (((uintptr_t) m + i) % 8)); - - len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0); - mask = _mm_srli_epi64(_mm_loadu_si128((__m128i*) &iv[4]), 8*(8-n%8)); - mi = _mm_xor_si128(_mm_and_si128(mi, mask), len); - - v3 = _mm_xor_si128(v3, mi); - if (SIPHASH_ROUNDS == 2) { - COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3); - } else { - for (k = 0; k < SIPHASH_ROUNDS; ++k) - COMPRESS(v0,v1,v2,v3); - } - v0 = _mm_xor_si128(v0, mi); - - v2 = _mm_xor_si128(v2, _mm_loadu_si128((__m128i*) &iv[5])); - if (SIPHASH_FINALROUNDS == 4) { - COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3); - COMPRESS(v0,v1,v2,v3); COMPRESS(v0,v1,v2,v3); - } else { - for (k = 0; k < SIPHASH_FINALROUNDS; ++k) - COMPRESS(v0,v1,v2,v3); - } - - v0 = _mm_xor_si128(_mm_xor_si128(v0, v1), _mm_xor_si128(v2, v3)); - hash.xmm = v0; - -#undef COMPRESS -#undef HALF_ROUND - //return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32); - return hash.gpr; -} diff --git a/hashable-bench/benchmarks/cbits/siphash-sse41.c b/hashable-bench/benchmarks/cbits/siphash-sse41.c deleted file mode 100644 index a8cf081c..00000000 --- a/hashable-bench/benchmarks/cbits/siphash-sse41.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * The original code was developed by Samuel Neves, and has been - * only lightly modified. - * - * Used with permission. - */ -#pragma GCC target("sse4.1") - -#include -#include "siphash.h" - -// Specialized for siphash, do not reuse -#define rotate16(x) _mm_shufflehi_epi16((x), _MM_SHUFFLE(2,1,0,3)) - -#define _mm_roti_epi64(x, c) (((c) == 16) ? rotate16((x)) : _mm_xor_si128(_mm_slli_epi64((x), (c)), _mm_srli_epi64((x), 64-(c)))) -//#define _mm_roti_epi64(x, c) _mm_xor_si128(_mm_slli_epi64((x), (c)), _mm_srli_epi64((x), 64-(c))) - - -u64 hashable_siphash24_sse41(u64 _k0, u64 _k1, const unsigned char *m, size_t n) -{ - __m128i v0, v1, v02, v13; - __m128i k0; - __m128i mi, mask, len, h; - const __m128i zero = _mm_setzero_si128(); - size_t i, k; - union { u64 gpr; __m128i xmm; } hash; - unsigned char key[16]; - - ((u64 *)key)[0] = _k0; - ((u64 *)key)[1] = _k1; - - k0 = _mm_loadu_si128((__m128i*)(key + 0)); - - v0 = _mm_xor_si128(k0, _mm_set_epi32(0x646f7261, 0x6e646f6d, 0x736f6d65, 0x70736575)); - v1 = _mm_xor_si128(k0, _mm_set_epi32(0x74656462, 0x79746573, 0x6c796765, 0x6e657261)); - - v02 = _mm_unpacklo_epi64(v0, v1); - v13 = _mm_unpackhi_epi64(v0, v1); - -#define HALF_ROUND(a,b,s,t) \ -do \ -{ \ - __m128i b1,b2; \ - a = _mm_add_epi64(a, b); \ - b1 = _mm_roti_epi64(b, s); b2 = _mm_roti_epi64(b, t); b = _mm_blend_epi16(b1, b2, 0xF0); \ - b = _mm_xor_si128(b, a); \ -} while(0) - -#define COMPRESS(v02,v13) \ - do \ - { \ - HALF_ROUND(v02,v13,13,16); \ - v02 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \ - HALF_ROUND(v02,v13,17,21); \ - v02 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \ - } while(0) - - for(i = 0; i < (n-n%8); i += 8) - { - mi = _mm_loadl_epi64((__m128i*)(m + i)); - v13 = _mm_xor_si128(v13, _mm_unpacklo_epi64(zero, mi)); - for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v02,v13); - v02 = _mm_xor_si128(v02, mi); - } - - mi = _mm_loadl_epi64((__m128i*)(m + i)); - len = _mm_set_epi32(0, 0, (n&0xff) << 24, 0); - mask = _mm_srli_epi64(_mm_set_epi32(0, 0, 0xffffffff, 0xffffffff), 8*(8-n%8)); - mi = _mm_xor_si128(_mm_and_si128(mi, mask), len); - - v13 = _mm_xor_si128(v13, _mm_unpacklo_epi64(zero, mi)); - for(k = 0; k < SIPHASH_ROUNDS; ++k) COMPRESS(v02,v13); - v02 = _mm_xor_si128(v02, mi); - - v02 = _mm_xor_si128(v02, _mm_set_epi32(0, 0xff, 0, 0)); - for(k = 0; k < SIPHASH_FINALROUNDS; ++k) COMPRESS(v02,v13); - - v0 = _mm_xor_si128(v02, v13); - v0 = _mm_xor_si128(v0, _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(zero), _mm_castsi128_ps(v0)))); - hash.xmm = v0; - -#undef COMPRESS -#undef HALF_ROUND - //return _mm_extract_epi32(v0, 0) | (((u64)_mm_extract_epi32(v0, 1)) << 32); - return hash.gpr; -} diff --git a/hashable-bench/benchmarks/cbits/siphash.c b/hashable-bench/benchmarks/cbits/siphash.c deleted file mode 100644 index 7ed11d83..00000000 --- a/hashable-bench/benchmarks/cbits/siphash.c +++ /dev/null @@ -1,262 +0,0 @@ -/* Almost a verbatim copy of the reference implementation. */ - -#include -#include "siphash.h" - -#define ROTL(x,b) (u64)(((x) << (b)) | ((x) >> (64 - (b)))) - -#define SIPROUND \ - do { \ - v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ - v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ - v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ - v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ - } while(0) - -#if defined(__i386) -# define _siphash24 plain_siphash24 -#endif - -static inline u64 odd_read(const u8 *p, int count, u64 val, int shift) -{ - switch (count) { - case 7: val |= ((u64)p[6]) << (shift + 48); - case 6: val |= ((u64)p[5]) << (shift + 40); - case 5: val |= ((u64)p[4]) << (shift + 32); - case 4: val |= ((u64)p[3]) << (shift + 24); - case 3: val |= ((u64)p[2]) << (shift + 16); - case 2: val |= ((u64)p[1]) << (shift + 8); - case 1: val |= ((u64)p[0]) << shift; - } - return val; -} - -static inline u64 _siphash(int c, int d, u64 k0, u64 k1, - const u8 *str, size_t len) -{ - u64 v0 = 0x736f6d6570736575ull ^ k0; - u64 v1 = 0x646f72616e646f6dull ^ k1; - u64 v2 = 0x6c7967656e657261ull ^ k0; - u64 v3 = 0x7465646279746573ull ^ k1; - const u8 *end, *p; - u64 b; - int i; - - for (p = str, end = str + (len & ~7); p < end; p += 8) { - u64 m = peek_u64le((u64 *) p); - v3 ^= m; - if (c == 2) { - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < c; i++) - SIPROUND; - } - v0 ^= m; - } - - b = odd_read(p, len & 7, ((u64) len) << 56, 0); - - v3 ^= b; - if (c == 2) { - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < c; i++) - SIPROUND; - } - v0 ^= b; - - v2 ^= 0xff; - if (d == 4) { - SIPROUND; - SIPROUND; - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < d; i++) - SIPROUND; - } - b = v0 ^ v1 ^ v2 ^ v3; - return b; -} - - -static inline u64 _siphash24(u64 k0, u64 k1, const u8 *str, size_t len) -{ - return _siphash(2, 4, k0, k1, str, len); -} - -#if defined(__i386) -# undef _siphash24 - -static u64 (*_siphash24)(u64 k0, u64 k1, const u8 *, size_t); - -static void maybe_use_sse() - __attribute__((constructor)); - -static void maybe_use_sse() -{ - uint32_t eax = 1, ebx, ecx, edx; - - __asm volatile - ("mov %%ebx, %%edi;" /* 32bit PIC: don't clobber ebx */ - "cpuid;" - "mov %%ebx, %%esi;" - "mov %%edi, %%ebx;" - :"+a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) - : :"edi"); - -#if defined(HAVE_SSE2) - if (edx & (1 << 26)) - _siphash24 = hashable_siphash24_sse2; -#if defined(HAVE_SSE41) - else if (ecx & (1 << 19)) - _siphash24 = hashable_siphash24_sse41; -#endif - else -#endif - _siphash24 = plain_siphash24; -} - -#endif - -/* ghci's linker fails to call static initializers. */ -static inline void ensure_sse_init() -{ -#if defined(__i386) - if (_siphash24 == NULL) - maybe_use_sse(); -#endif -} - -u64 hashable_siphash(int c, int d, u64 k0, u64 k1, const u8 *str, size_t len) -{ - return _siphash(c, d, k0, k1, str, len); -} - -u64 hashable_siphash24(u64 k0, u64 k1, const u8 *str, size_t len) -{ - ensure_sse_init(); - return _siphash24(k0, k1, str, len); -} - -/* Used for ByteArray#s. We can't treat them like pointers in - native Haskell, but we can in unsafe FFI calls. - */ -u64 hashable_siphash24_offset(u64 k0, u64 k1, - const u8 *str, size_t off, size_t len) -{ - ensure_sse_init(); - return _siphash24(k0, k1, str + off, len); -} - -static int _siphash_chunk(int c, int d, int buffered, u64 v[5], - const u8 *str, size_t len, size_t totallen) -{ - u64 v0 = v[0], v1 = v[1], v2 = v[2], v3 = v[3], m, b; - const u8 *p, *end; - u64 carry = 0; - int i; - - if (buffered > 0) { - int unbuffered = 8 - buffered; - int tobuffer = unbuffered > len ? len : unbuffered; - int shift = buffered << 3; - - m = odd_read(str, tobuffer, v[4], shift); - str += tobuffer; - buffered += tobuffer; - len -= tobuffer; - - if (buffered < 8) - carry = m; - else { - v3 ^= m; - if (c == 2) { - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < c; i++) - SIPROUND; - } - v0 ^= m; - buffered = 0; - m = 0; - } - } - - for (p = str, end = str + (len & ~7); p < end; p += 8) { - m = peek_u64le((u64 *) p); - v3 ^= m; - if (c == 2) { - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < c; i++) - SIPROUND; - } - v0 ^= m; - } - - b = odd_read(p, len & 7, 0, 0); - - if (totallen == -1) { - v[0] = v0; - v[1] = v1; - v[2] = v2; - v[3] = v3; - v[4] = b | carry; - - return buffered + (len & 7); - } - - b |= ((u64) totallen) << 56; - - v3 ^= b; - if (c == 2) { - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < c; i++) - SIPROUND; - } - v0 ^= b; - - v2 ^= 0xff; - if (d == 4) { - SIPROUND; - SIPROUND; - SIPROUND; - SIPROUND; - } else { - for (i = 0; i < d; i++) - SIPROUND; - } - v[4] = v0 ^ v1 ^ v2 ^ v3; - return 0; -} - -void hashable_siphash_init(u64 k0, u64 k1, u64 *v) -{ - v[0] = 0x736f6d6570736575ull ^ k0; - v[1] = 0x646f72616e646f6dull ^ k1; - v[2] = 0x6c7967656e657261ull ^ k0; - v[3] = 0x7465646279746573ull ^ k1; - v[4] = 0; -} - -int hashable_siphash24_chunk(int buffered, u64 v[5], const u8 *str, - size_t len, size_t totallen) -{ - return _siphash_chunk(2, 4, buffered, v, str, len, totallen); -} - -/* - * Used for ByteArray#. - */ -int hashable_siphash24_chunk_offset(int buffered, u64 v[5], const u8 *str, - size_t off, size_t len, size_t totallen) -{ - return _siphash_chunk(2, 4, buffered, v, str + off, len, totallen); -} diff --git a/hashable-bench/benchmarks/cbits/siphash.h b/hashable-bench/benchmarks/cbits/siphash.h deleted file mode 100644 index 05a565f5..00000000 --- a/hashable-bench/benchmarks/cbits/siphash.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _hashable_siphash_h -#define _hashable_siphash_h - -#include -#include - -typedef uint64_t u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; - -#define SIPHASH_ROUNDS 2 -#define SIPHASH_FINALROUNDS 4 - -u64 hashable_siphash(int, int, u64, u64, const u8 *, size_t); -u64 hashable_siphash24(u64, u64, const u8 *, size_t); - -#if defined(__i386) - -/* To use SSE instructions, we have to adjust the stack from its - default of 4-byte alignment to use 16-byte alignment. */ - -# define ALIGNED_STACK __attribute__((force_align_arg_pointer)) - -u64 hashable_siphash24_sse2(u64, u64, const u8 *, size_t) ALIGNED_STACK; -u64 hashable_siphash24_sse41(u64, u64, const u8 *, size_t) ALIGNED_STACK; -#endif - -#if defined(_WIN32) -# define __LITTLE_ENDIAN 1234 -# define __BIG_ENDIAN 4321 -# define __BYTE_ORDER __LITTLE_ENDIAN - -#elif (defined(__FreeBSD__) && __FreeBSD_version >= 470000) || defined(__OpenBSD__) || defined(__NetBSD__) -# include -# define __BIG_ENDIAN BIG_ENDIAN -# define __LITTLE_ENDIAN LITTLE_ENDIAN -# define __BYTE_ORDER BYTE_ORDER - -#elif (defined(BSD) && (BSD >= 199103)) || defined(__APPLE__) -# include -# define __BIG_ENDIAN BIG_ENDIAN -# define __LITTLE_ENDIAN LITTLE_ENDIAN -# define __BYTE_ORDER BYTE_ORDER - -#elif defined(__linux__) -# include -#endif - -static inline u64 peek_u64le(const u64 *p) -{ - u64 x = *p; - -#if __BYTE_ORDER == __BIG_ENDIAN - x = ((x & 0xff00000000000000ull) >> 56) | - ((x & 0x00ff000000000000ull) >> 40) | - ((x & 0x0000ff0000000000ull) >> 24) | - ((x & 0x000000ff00000000ull) >> 8) | - ((x & 0x00000000ff000000ull) << 8) | - ((x & 0x0000000000ff0000ull) << 24) | - ((x & 0x000000000000ff00ull) << 40) | - ((x & 0x00000000000000ffull) << 56); -#endif - - return x; -} - -#endif /* _hashable_siphash_h */ diff --git a/hashable-bench/benchmarks/cbits/wang.c b/hashable-bench/benchmarks/cbits/wang.c deleted file mode 100644 index ea37ff9f..00000000 --- a/hashable-bench/benchmarks/cbits/wang.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * These hash functions were developed by Thomas Wang. - * - * http://www.concentric.net/~ttwang/tech/inthash.htm - */ - -#include - -uint32_t hashable_wang_32(uint32_t a) -{ - a = (a ^ 61) ^ (a >> 16); - a = a + (a << 3); - a = a ^ (a >> 4); - a = a * 0x27d4eb2d; - a = a ^ (a >> 15); - return a; -} - -uint64_t hashable_wang_64(uint64_t key) -{ - key = (~key) + (key << 21); // key = (key << 21) - key - 1; - key = key ^ ((key >> 24) | (key << 40)); - key = (key + (key << 3)) + (key << 8); // key * 265 - key = key ^ ((key >> 14) | (key << 50)); - key = (key + (key << 2)) + (key << 4); // key * 21 - key = key ^ ((key >> 28) | (key << 36)); - key = key + (key << 31); - return key; -} diff --git a/hashable-bench/hashable-bench.cabal b/hashable-bench/hashable-bench.cabal index 1e6dd0c6..0642c7f0 100644 --- a/hashable-bench/hashable-bench.cabal +++ b/hashable-bench/hashable-bench.cabal @@ -33,25 +33,15 @@ tested-with: || ==9.10.1 extra-source-files: - benchmarks/cbits/*.h include/HsHashable.h flag integer-gmp description: Are we using @integer-gmp@ to provide fast Integer instances? No effect on GHC-9.0 or later. + manual: False default: True -flag sse2 - description: Do we want to assume that a target supports SSE 2? - default: True - manual: True - -flag sse41 - description: Do we want to assume that a target supports SSE 4.1? - default: False - manual: True - library exposed-modules: Data.Hashable @@ -130,7 +120,7 @@ library benchmark hashable-benchmark -- We cannot depend on the hashable library directly as that creates -- a dependency cycle. - hs-source-dirs: benchmarks src-siphash + hs-source-dirs: benchmarks main-is: Benchmarks.hs type: exitcode-stdio-1.0 build-depends: @@ -153,22 +143,6 @@ benchmark hashable-benchmark if impl(ghc >=7.2.1) cpp-options: -DGENERICS - include-dirs: benchmarks/cbits - includes: siphash.h - other-modules: Data.Hashable.SipHash - c-sources: - benchmarks/cbits/inthash.c - benchmarks/cbits/siphash.c - benchmarks/cbits/wang.c - - if ((arch(i386) || arch(x86_64)) && flag(sse2)) - cpp-options: -DHAVE_SSE2 - c-sources: benchmarks/cbits/siphash-sse2.c - - if flag(sse41) - cpp-options: -DHAVE_SSE41 - c-sources: benchmarks/cbits/siphash-sse41.c - ghc-options: -Wall -O2 default-language: Haskell2010 diff --git a/hashable-bench/src-siphash/Data/Hashable/SipHash.hs b/hashable-bench/src-siphash/Data/Hashable/SipHash.hs deleted file mode 100644 index c553bdda..00000000 --- a/hashable-bench/src-siphash/Data/Hashable/SipHash.hs +++ /dev/null @@ -1,159 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, RecordWildCards #-} -{-# OPTIONS_GHC -fno-warn-name-shadowing #-} - -module Data.Hashable.SipHash - ( - LE64 - , Sip - , fromWord64 - , fullBlock - , lastBlock - , finalize - , hashByteString - ) where - -#include "MachDeps.h" - -import Data.Bits ((.|.), (.&.), rotateL, shiftL, xor) -#if MIN_VERSION_base(4,5,0) -import Data.Bits (unsafeShiftL) -#endif -import Data.Word (Word8, Word64) -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr (Ptr, castPtr, plusPtr) -import Data.ByteString.Internal (ByteString(PS), accursedUnutterablePerformIO) -import Foreign.Storable (peek) -import Numeric (showHex) - -newtype LE64 = LE64 { fromLE64 :: Word64 } - deriving (Eq) - -instance Show LE64 where - show (LE64 !v) = let s = showHex v "" - in "0x" ++ replicate (16 - length s) '0' ++ s - -data Sip = Sip { - v0 :: {-# UNPACK #-} !Word64, v1 :: {-# UNPACK #-} !Word64 - , v2 :: {-# UNPACK #-} !Word64, v3 :: {-# UNPACK #-} !Word64 - } - -fromWord64 :: Word64 -> LE64 -#ifndef WORDS_BIGENDIAN -fromWord64 = LE64 -#else -#error big endian support TBD -#endif - -initState :: (Sip -> r) -> Word64 -> Word64 -> r -initState k k0 k1 = k (Sip s0 s1 s2 s3) - where !s0 = (k0 `xor` 0x736f6d6570736575) - !s1 = (k1 `xor` 0x646f72616e646f6d) - !s2 = (k0 `xor` 0x6c7967656e657261) - !s3 = (k1 `xor` 0x7465646279746573) - -sipRound :: (Sip -> r) -> Sip -> r -sipRound k Sip{..} = k (Sip v0_c v1_d v2_c v3_d) - where v0_a = v0 + v1 - v2_a = v2 + v3 - v1_a = v1 `rotateL` 13 - v3_a = v3 `rotateL` 16 - v1_b = v1_a `xor` v0_a - v3_b = v3_a `xor` v2_a - v0_b = v0_a `rotateL` 32 - v2_b = v2_a + v1_b - v0_c = v0_b + v3_b - v1_c = v1_b `rotateL` 17 - v3_c = v3_b `rotateL` 21 - v1_d = v1_c `xor` v2_b - v3_d = v3_c `xor` v0_c - v2_c = v2_b `rotateL` 32 - -fullBlock :: Int -> LE64 -> (Sip -> r) -> Sip -> r -fullBlock c m k st@Sip{..} - | c == 2 = sipRound (sipRound k') st' - | otherwise = runRounds c k' st' - where k' st1@Sip{..} = k st1{ v0 = v0 `xor` fromLE64 m } - st' = st{ v3 = v3 `xor` fromLE64 m } -{-# INLINE fullBlock #-} - -runRounds :: Int -> (Sip -> r) -> Sip -> r -runRounds c k = go 0 - where go i st - | i < c = sipRound (go (i+1)) st - | otherwise = k st -{-# INLINE runRounds #-} - -lastBlock :: Int -> Int -> LE64 -> (Sip -> r) -> Sip -> r -lastBlock !c !len !m k st = -#ifndef WORDS_BIGENDIAN - fullBlock c (LE64 m') k st -#else -#error big endian support TBD -#endif - where m' = fromLE64 m .|. ((fromIntegral len .&. 0xff) `shiftL` 56) -{-# INLINE lastBlock #-} - -finalize :: Int -> (Word64 -> r) -> Sip -> r -finalize d k st@Sip{..} - | d == 4 = sipRound (sipRound (sipRound (sipRound k'))) st' - | otherwise = runRounds d k' st' - where k' Sip{..} = k $! v0 `xor` v1 `xor` v2 `xor` v3 - st' = st{ v2 = v2 `xor` 0xff } -{-# INLINE finalize #-} - -hashByteString :: Int -> Int -> Word64 -> Word64 -> ByteString -> Word64 -hashByteString !c !d k0 k1 (PS fp off len) = - accursedUnutterablePerformIO . withForeignPtr fp $ \basePtr -> - let ptr0 = basePtr `plusPtr` off - scant = len .&. 7 - endBlocks = ptr0 `plusPtr` (len - scant) - go !ptr st - | ptr == endBlocks = readLast ptr - | otherwise = do - m <- peekLE64 ptr - fullBlock c m (go (ptr `plusPtr` 8)) st - where - zero !m _ _ = lastBlock c len (LE64 m) (finalize d return) st - one k m p s = do - w <- fromIntegral `fmap` peekByte p - k (m .|. (w `unsafeShiftL` s)) (p `plusPtr` 1) (s+8) - readLast p = - case scant of - 0 -> zero 0 p (0::Int) - 1 -> one zero 0 p 0 - 2 -> one (one zero) 0 p 0 - 3 -> one (one (one zero)) 0 p 0 - 4 -> one (one (one (one zero))) 0 p 0 - 5 -> one (one (one (one (one zero)))) 0 p 0 - 6 -> one (one (one (one (one (one zero))))) 0 p 0 - _ -> one (one (one (one (one (one (one zero)))))) 0 p 0 - in initState (go ptr0) k0 k1 - -peekByte :: Ptr Word8 -> IO Word8 -peekByte = peek - -peekLE64 :: Ptr Word8 -> IO LE64 -#if defined(x86_64_HOST_ARCH) || defined(i386_HOST_ARCH) --- platforms on which unaligned loads are legal and usually fast -peekLE64 p = LE64 `fmap` peek (castPtr p) -#else -peekLE64 p = do - let peek8 d = fromIntegral `fmap` peekByte (p `plusPtr` d) - b0 <- peek8 0 - b1 <- peek8 1 - b2 <- peek8 2 - b3 <- peek8 3 - b4 <- peek8 4 - b5 <- peek8 5 - b6 <- peek8 6 - b7 <- peek8 7 - let !w = (b7 `shiftL` 56) .|. (b6 `shiftL` 48) .|. (b5 `shiftL` 40) .|. - (b4 `shiftL` 32) .|. (b3 `shiftL` 24) .|. (b2 `shiftL` 16) .|. - (b1 `shiftL` 8) .|. b0 - return (fromWord64 w) -#endif - -#if !(MIN_VERSION_base(4,5,0)) -unsafeShiftL :: Word64 -> Int -> Word64 -unsafeShiftL = shiftL -#endif