diff --git a/hashable-bench/hashable-bench.cabal b/hashable-bench/hashable-bench.cabal index 9b3eea6..1e6dd0c 100644 --- a/hashable-bench/hashable-bench.cabal +++ b/hashable-bench/hashable-bench.cabal @@ -63,6 +63,7 @@ library Data.Hashable.Generic.Instances Data.Hashable.Imports Data.Hashable.LowLevel + Data.Hashable.Mix c-sources: cbits/fnv.c include-dirs: include diff --git a/hashable.cabal b/hashable.cabal index c8a79b9..1786bd9 100644 --- a/hashable.cabal +++ b/hashable.cabal @@ -12,7 +12,6 @@ description: The 'Hashable' 'hash' values are not guaranteed to be stable across library versions, operating systems or architectures. For stable hashing use named hashes: SHA256, CRC32 etc. homepage: http://github.com/haskell-unordered-containers/hashable - license: BSD-3-Clause license-file: LICENSE author: @@ -72,12 +71,13 @@ library Data.Hashable.Generic.Instances Data.Hashable.Imports Data.Hashable.LowLevel + Data.Hashable.Mix c-sources: cbits/fnv.c include-dirs: include hs-source-dirs: src build-depends: - base >=4.12.0.0 && <4.21 + , base >=4.12.0.0 && <4.21 , bytestring >=0.10.8.2 && <0.13 , containers >=0.6.0.1 && <0.8 , deepseq >=1.4.4.0 && <1.6 @@ -159,7 +159,7 @@ test-suite hashable-tests Regress build-depends: - base + , base , bytestring , filepath , ghc-prim @@ -187,7 +187,7 @@ test-suite hashable-tests test-suite hashable-examples type: exitcode-stdio-1.0 build-depends: - base + , base , ghc-prim , hashable diff --git a/src/Data/Hashable/LowLevel.hs b/src/Data/Hashable/LowLevel.hs index 7f129fb..b258443 100644 --- a/src/Data/Hashable/LowLevel.hs +++ b/src/Data/Hashable/LowLevel.hs @@ -22,6 +22,7 @@ import System.IO.Unsafe (unsafePerformIO) #endif import Data.Hashable.Imports +import Data.Hashable.Mix ------------------------------------------------------------------------------- -- Initial seed @@ -61,30 +62,7 @@ defaultSalt' = -2128831035 -- 2166136261 :: Int32 -- | Hash 'Int'. First argument is a salt, second argument is an 'Int'. -- The result is new salt / hash value. hashInt :: Salt -> Int -> Salt -hashInt s x = s `rnd` x1 `rnd` x2 `rnd` x3 `rnd` x4 - where - {-# INLINE rnd #-} - {-# INLINE x1 #-} - {-# INLINE x2 #-} - {-# INLINE x3 #-} - {-# INLINE x4 #-} -#if WORD_SIZE_IN_BITS == 64 - -- See https://github.com/haskell-unordered-containers/hashable/issues/270 - -- FNV-1 is defined to hash byte at the time. - -- We used to hash whole Int at once, which provided very bad mixing. - -- Current is a performance-quality compromise, we do four rounds per Int (instead of 8 for FNV-1 or 1 for previous hashable). - rnd a b = (a * 1099511628211) `xor` b - x1 = shiftR x 48 .&. 0xffff - x2 = shiftR x 32 .&. 0xffff - x3 = shiftR x 16 .&. 0xffff - x4 = x .&. 0xffff -#else - rnd a b = (a * 16777619) `xor` b - x1 = shiftR x 24 .&. 0xff - x2 = shiftR x 16 .&. 0xff - x3 = shiftR x 8 .&. 0xff - x4 = x .&. 0xff -#endif +hashInt s x = fromIntegral (mixHash (fromIntegral s) (fromIntegral x)) -- Note: FNV-1 hash takes a byte of data at once, here we take an 'Int', -- which is 4 or 8 bytes. Whether that's bad or not, I don't know. diff --git a/src/Data/Hashable/Mix.hs b/src/Data/Hashable/Mix.hs new file mode 100644 index 0000000..aebff2c --- /dev/null +++ b/src/Data/Hashable/Mix.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE UnboxedTuples #-} +module Data.Hashable.Mix (mixHash) where + +#include "MachDeps.h" + +import Data.Bits (unsafeShiftR, xor) +import GHC.Exts (Word (..), byteSwap#, timesWord2#, xor#) + +mulFold :: Word -> Word -> Word +mulFold (W# x) (W# y) = case timesWord2# x y of + (# hi, lo #) -> W# (xor# hi lo) + +byteSwap :: Word -> Word +byteSwap (W# w) = W# (byteSwap# w) + +avalanche :: Word -> Word +avalanche z0 = +#if WORD_SIZE_IN_BITS == 64 + -- MurmurHash3Mixer + let z1 = shiftXorMultiply 33 0xff51afd7ed558ccd z0 + z2 = shiftXorMultiply 33 0xc4ceb9fe1a85ec53 z1 + z3 = shiftXor 33 z2 + in z3 +#else + -- MurmurHash3Mixer 32bit + let z1 = shiftXorMultiply 16 0x85ebca6b z0 + z2 = shiftXorMultiply 13 0xc2b2ae35 z1 + z3 = shiftXor 16 z2 + in z3 +#endif + +shiftXor :: Int -> Word -> Word +shiftXor n w = w `xor` (w `unsafeShiftR` n) + +shiftXorMultiply :: Int -> Word -> Word -> Word +shiftXorMultiply n k w = shiftXor n w * k + +-- | Mix hash is inspired by how xxh3 works on small (<=16byte) inputs. +mixHash :: Word -> Word -> Word +mixHash hi lo = avalanche (byteSwap lo + hi + mulFold hi lo) diff --git a/tests/Regress.hs b/tests/Regress.hs index 20496d9..3780389 100644 --- a/tests/Regress.hs +++ b/tests/Regress.hs @@ -66,9 +66,9 @@ regressions = [] ++ , testCase "64 bit Text" $ do hash ("hello world" :: Text) @?= #if MIN_VERSION_text(2,0,0) - 2589482369471999198 + 588044899381568208 #else - -1955893671357159554 + -5067133951949802236 #endif #endif , F.testGroup "concatenation"