From 89c7baa0d8d6a9dc56cd654635bb0a2fe590a841 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 5 Oct 2025 19:12:03 +0200 Subject: [PATCH 01/10] two: Optimize computation of idx2 further This reduces the core size for the inner loop of two by 10 terms. --- Data/HashMap/Internal.hs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 6cad1028..29081931 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -164,7 +164,7 @@ import Data.Hashable (Hashable) import Data.Hashable.Lifted (Hashable1, Hashable2) import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare) import Data.Semigroup (Semigroup (..), stimesIdempotentMonoid) -import GHC.Exts (Int (..), Int#, TYPE, (==#)) +import GHC.Exts (Int (..), Int#, TYPE, Word (..), (==#)) import GHC.Stack (HasCallStack) import Prelude hiding (Foldable (..), filter, lookup, map, pred) @@ -978,11 +978,9 @@ two = go ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - bp1 = mask h1 s - bp2 = mask h2 s - !(I# i1) = index h1 s - !(I# i2) = index h2 s - idx2 = I# (i1 Exts.<# i2) + !bp1@(W# bp1#) = mask h1 s + !bp2@(W# bp2#) = mask h2 s + idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- -- idx2 | index h1 s < index h2 s = 1 From aa19655ec3cd7b1312f7ab8dee44ed95b6cf06bb Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 5 Oct 2025 19:54:55 +0200 Subject: [PATCH 02/10] Use two with Collisions too This results in a ~10% reduction in Code size for each of the insert variants. Closes #447. --- Data/HashMap/Internal.hs | 9 ++++----- Data/HashMap/Internal/Strict.hs | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 29081931..eee85f57 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -832,7 +832,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = runST (two s h k x hy t) {-# INLINABLE insert' #-} -- | Insert optimized for the case when we know the key is not in the map. @@ -866,8 +866,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (A.snoc v (L k x)) - | otherwise = - go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = runST (two s h k x hy t) {-# NOINLINE insertNewKey #-} @@ -953,7 +952,7 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) where i = index h s go h k x s t@(Collision hy v) | h == hy = return $! Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = two s h k x hy t {-# INLINABLE unsafeInsert #-} -- | Create a map from two key-value pairs which hashes don't collide. To @@ -1107,7 +1106,7 @@ unsafeInsertWithKey f k0 v0 m0 = runST (go h0 k0 v0 0 m0) where i = index h s go h k x s t@(Collision hy v) | h == hy = return $! Collision h (updateOrSnocWithKey f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = two s h k x hy t {-# INLINABLE unsafeInsertWithKey #-} -- | \(O(\log n)\) Remove the mapping for the specified key from this map diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index ce9a48fa..e4454cb0 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -216,7 +216,7 @@ insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (updateOrSnocWith f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = x `seq` runST (HM.two s h k x hy t) {-# INLINABLE insertWith #-} -- | In-place update version of insertWith @@ -257,7 +257,7 @@ unsafeInsertWithKey f k0 v0 m0 = runST (go h0 k0 v0 0 m0) where i = index h s go h k x s t@(Collision hy v) | h == hy = return $! Collision h (updateOrSnocWithKey f k x v) - | otherwise = go h k x s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = x `seq` HM.two s h k x hy t {-# INLINABLE unsafeInsertWithKey #-} -- | \(O(\log n)\) Adjust the value tied to a given key in this map only From 324497be348f70d4bcef0537b1b04e743fee9ef8 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 16 Oct 2025 16:58:18 +0200 Subject: [PATCH 03/10] Refactor `two` * Use the "shifted hash" approach. * Create a variant `two'` that can be used to address #468. --- Data/HashMap/Internal.hs | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index eee85f57..32656c24 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -964,21 +964,25 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) -- because it's already in WHNF (having just been matched) and we -- just put it directly in an array. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) -two = go +two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) +{-# INLINE two #-} + +two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) +two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 where - go s h1 k1 v1 h2 t2 + go !sh1 t1 !sh2 t2 | bp1 == bp2 = do - st <- go (nextShift s) h1 k1 v1 h2 t2 + st <- go (shiftHash sh1 bitsPerSubkey) t1 (shiftHash sh2 bitsPerSubkey) t2 ary <- A.singletonM st return $ BitmapIndexed bp1 ary | otherwise = do - mary <- A.new 2 $! Leaf h1 (L k1 v1) + mary <- A.new 2 t1 A.write mary idx2 t2 ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - !bp1@(W# bp1#) = mask h1 s - !bp2@(W# bp2#) = mask h2 s + !bp1@(W# bp1#) = mask' sh1 + !bp2@(W# bp2#) = mask' sh2 idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- @@ -986,7 +990,13 @@ two = go -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 -{-# INLINE two #-} + + shiftHash :: Hash -> Shift -> Word -- type ShiftedHash + shiftHash h n = h `unsafeShiftR` n + + mask' :: Word -> Bitmap + mask' w = 1 `unsafeShiftL` fromIntegral (w .&. subkeyMask) +{-# INLINE two' #-} -- Really?! -- | \(O(\log n)\) Associate the value with the key in this map. If -- this map previously contained a mapping for the key, the old value From f1a4d3f7c6470cafa5e06f71ae795788af260033 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 16 Oct 2025 17:26:02 +0200 Subject: [PATCH 04/10] Update docs and remove outdated note Closes #335. --- Data/HashMap/Internal.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 32656c24..16c0d9b9 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -955,18 +955,20 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) | otherwise = two s h k x hy t {-# INLINABLE unsafeInsert #-} --- | Create a map from two key-value pairs which hashes don't collide. To --- enhance sharing, the second key-value pair is represented by the hash of its --- key and a singleton HashMap pairing its key with its value. --- --- Note: to avoid silly thunks, this function must be strict in the --- key. See issue #232. We don't need to force the HashMap argument --- because it's already in WHNF (having just been matched) and we --- just put it directly in an array. +-- | Create a map from a key-value pair and a 'Leaf' or 'Collision' node with a +-- different hash. +-- +-- It is the caller's responsibility to ensure that the HashMap argument is in +-- WHNF. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) {-# INLINE two #-} +-- | Create a map from two 'Leaf' or 'Collision' nodes whose hashes are +-- distinct. +-- +-- It is the caller's responsibility to ensure that both HashMap arguments are +-- in WHNF. two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 where From 8a690682e4f6901b1a93aa8eaf402d5901eab46a Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 17 Oct 2025 01:17:56 +0200 Subject: [PATCH 05/10] Replace `unionWithKey.goDifferentHash` with `two'` Closes #468. --- Data/HashMap/Internal.hs | 17 +++++------------ Data/HashMap/Internal/Strict.hs | 16 ++++------------ 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 16c0d9b9..e4404340 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -126,6 +126,7 @@ module Data.HashMap.Internal , nextShift , sparseIndex , two + , two' , unionArrayBy , updateFullArray , updateFullArrayM @@ -1619,16 +1620,16 @@ unionWithKey f = go 0 | h1 == h2 = if k1 == k2 then Leaf h1 (L k1 (f k1 v1 v2)) else collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (two' s h1 t1 h2 t2) go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) | h1 == h2 = Collision h1 (updateOrSnocWithKey (\k a b -> (# f k a b #)) k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (two' s h1 t1 h2 t2) go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) | h1 == h2 = Collision h1 (updateOrSnocWithKey (\k a b -> (# f k b a #)) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (two' s h1 t1 h2 t2) go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) | h1 == h2 = Collision h1 (updateOrConcatWithKey (\k a b -> (# f k a b #)) ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (two' s h1 t1 h2 t2) -- branch vs. branch go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = let b' = b1 .|. b2 @@ -1681,14 +1682,6 @@ unionWithKey f = go 0 leafHashCode (Leaf h _) = h leafHashCode (Collision h _) = h leafHashCode _ = error "leafHashCode" - - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! goDifferentHash (nextShift s) h1 h2 t1 t2) - | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) - | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) - where - m1 = mask h1 s - m2 = mask h2 s {-# INLINE unionWithKey #-} -- | Strict in the result of @f@. diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index e4454cb0..1c064ab1 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -465,16 +465,16 @@ unionWithKey f = go 0 | h1 == h2 = if k1 == k2 then leaf h1 k1 (f k1 v1 v2) else HM.collision h1 l1 l2 - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (HM.two' s h1 t1 h2 t2) go s t1@(Leaf h1 (L k1 v1)) t2@(Collision h2 ls2) | h1 == h2 = Collision h1 (updateOrSnocWithKey f k1 v1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (HM.two' s h1 t1 h2 t2) go s t1@(Collision h1 ls1) t2@(Leaf h2 (L k2 v2)) | h1 == h2 = Collision h1 (updateOrSnocWithKey (flip . f) k2 v2 ls1) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (HM.two' s h1 t1 h2 t2) go s t1@(Collision h1 ls1) t2@(Collision h2 ls2) | h1 == h2 = Collision h1 (HM.updateOrConcatWithKey (\k a b -> let !v = f k a b in (# v #)) ls1 ls2) - | otherwise = goDifferentHash s h1 h2 t1 t2 + | otherwise = runST (HM.two' s h1 t1 h2 t2) -- branch vs. branch go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) = let b' = b1 .|. b2 @@ -527,14 +527,6 @@ unionWithKey f = go 0 leafHashCode (Leaf h _) = h leafHashCode (Collision h _) = h leafHashCode _ = error "leafHashCode" - - goDifferentHash s h1 h2 t1 t2 - | m1 == m2 = BitmapIndexed m1 (A.singleton $! goDifferentHash (nextShift s) h1 h2 t1 t2) - | m1 < m2 = BitmapIndexed (m1 .|. m2) (A.pair t1 t2) - | otherwise = BitmapIndexed (m1 .|. m2) (A.pair t2 t1) - where - m1 = mask h1 s - m2 = mask h2 s {-# INLINE unionWithKey #-} ------------------------------------------------------------------------ From b0644f9577de1131acab7ac1429ab980226dc1ca Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 15:59:29 +0200 Subject: [PATCH 06/10] Introduce `ShiftedHash` --- Data/HashMap/Internal.hs | 88 +++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index e4404340..789488ba 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -884,33 +884,19 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0 = Leaf h (L k x) go collPos shiftedHash k x (BitmapIndexed b ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k x st + !st' = go collPos (nextSH shiftedHash) k x st in BitmapIndexed b (A.update ary i st') - where m = mask' shiftedHash + where m = maskSH shiftedHash i = sparseIndex b m go collPos shiftedHash k x (Full ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k x st + !st' = go collPos (nextSH shiftedHash) k x st in Full (updateFullArray ary i st') - where i = index' shiftedHash + where i = indexSH shiftedHash go collPos _shiftedHash k x (Collision h v) | collPos >= 0 = Collision h (setAtPosition collPos k x v) | otherwise = Empty -- error "Internal error: go {collPos negative}" go _ _ _ _ Empty = Empty -- error "Internal error: go Empty" - - -- Customized version of 'index' that doesn't require a 'Shift'. - index' :: Hash -> Int - index' w = fromIntegral $ w .&. subkeyMask - {-# INLINE index' #-} - - -- Customized version of 'mask' that doesn't require a 'Shift'. - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` index' w - {-# INLINE mask' #-} - - shiftHash h = h `unsafeShiftR` bitsPerSubkey - {-# INLINE shiftHash #-} - {-# NOINLINE insertKeyExists #-} -- | Replace the ith Leaf with Leaf k v. @@ -971,11 +957,11 @@ two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) -- It is the caller's responsibility to ensure that both HashMap arguments are -- in WHNF. two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) -two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 +two' s h1 lc1 h2 lc2 = go (shiftHash s h1) lc1 (shiftHash s h2) lc2 where go !sh1 t1 !sh2 t2 | bp1 == bp2 = do - st <- go (shiftHash sh1 bitsPerSubkey) t1 (shiftHash sh2 bitsPerSubkey) t2 + st <- go (nextSH sh1) t1 (nextSH sh2) t2 ary <- A.singletonM st return $ BitmapIndexed bp1 ary | otherwise = do @@ -984,8 +970,8 @@ two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 ary <- A.unsafeFreeze mary return $ BitmapIndexed (bp1 .|. bp2) ary where - !bp1@(W# bp1#) = mask' sh1 - !bp2@(W# bp2#) = mask' sh2 + !bp1@(W# bp1#) = maskSH sh1 + !bp2@(W# bp2#) = maskSH sh2 idx2 = I# (bp1# `Exts.ltWord#` bp2#) -- This way of computing idx2 saves us a branch compared to the previous approach: -- @@ -993,12 +979,6 @@ two' s h1 lc1 h2 lc2 = go (shiftHash h1 s) lc1 (shiftHash h2 s) lc2 -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 - - shiftHash :: Hash -> Shift -> Word -- type ShiftedHash - shiftHash h n = h `unsafeShiftR` n - - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` fromIntegral (w .&. subkeyMask) {-# INLINE two' #-} -- Really?! -- | \(O(\log n)\) Associate the value with the key in this map. If @@ -1188,11 +1168,11 @@ delete' h0 k0 m0 = go h0 k0 0 m0 deleteKeyExists :: Int -> Hash -> k -> HashMap k v -> HashMap k v deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 where - go :: Int -> Word -> k -> HashMap k v -> HashMap k v + go :: Int -> ShiftedHash -> k -> HashMap k v -> HashMap k v go !_collPos !_shiftedHash !_k (Leaf _ _) = Empty go collPos shiftedHash k (BitmapIndexed b ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k st + !st' = go collPos (nextSH shiftedHash) k st in case st' of Empty | A.length ary == 1 -> Empty | A.length ary == 2 -> @@ -1205,18 +1185,18 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 bIndexed = BitmapIndexed (b .&. complement m) (A.delete ary i) l | isLeafOrCollision l && A.length ary == 1 -> l _ -> BitmapIndexed b (A.update ary i st') - where m = mask' shiftedHash + where m = maskSH shiftedHash i = sparseIndex b m go collPos shiftedHash k (Full ary) = let !st = A.index ary i - !st' = go collPos (shiftHash shiftedHash) k st + !st' = go collPos (nextSH shiftedHash) k st in case st' of Empty -> let ary' = A.delete ary i bm = fullBitmap .&. complement (1 `unsafeShiftL` i) in BitmapIndexed bm ary' _ -> Full (A.update ary i st') - where i = index' shiftedHash + where i = indexSH shiftedHash go collPos _shiftedHash _k (Collision h v) | A.length v == 2 = if collPos == 0 @@ -1224,20 +1204,6 @@ deleteKeyExists !collPos0 !h0 !k0 !m0 = go collPos0 h0 k0 m0 else Leaf h (A.index v 0) | otherwise = Collision h (A.delete v collPos) go !_ !_ !_ Empty = Empty -- error "Internal error: deleteKeyExists empty" - - -- Customized version of 'index' that doesn't require a 'Shift'. - index' :: Hash -> Int - index' w = fromIntegral $ w .&. subkeyMask - {-# INLINE index' #-} - - -- Customized version of 'mask' that doesn't require a 'Shift'. - mask' :: Word -> Bitmap - mask' w = 1 `unsafeShiftL` index' w - {-# INLINE mask' #-} - - shiftHash h = h `unsafeShiftR` bitsPerSubkey - {-# INLINE shiftHash #-} - {-# NOINLINE deleteKeyExists #-} -- | \(O(\log n)\) Adjust the value tied to a given key in this map only @@ -2512,6 +2478,34 @@ nextShift :: Shift -> Shift nextShift s = s + bitsPerSubkey {-# INLINE nextShift #-} +------------------------------------------------------------------------ +-- ShiftedHash + +-- | Sometimes it's more efficient to right-shift the hashes directly instead +-- of keeping track of an additional 'Shift' value. +type ShiftedHash = Hash + +-- | Construct a 'ShiftedHash' from a 'Shift' and a 'Hash'. +shiftHash :: Shift -> Hash -> ShiftedHash +shiftHash s h = h `unsafeShiftR` s +{-# INLINE shiftHash #-} + +-- | Update a 'ShiftedHash' for the next level of the tree. +nextSH :: ShiftedHash -> ShiftedHash +nextSH sh = sh `unsafeShiftR` bitsPerSubkey +{-# INLINE nextSH #-} + +-- | Version of 'index' for use with @'ShiftedHash'es@. +indexSH :: ShiftedHash -> Int +indexSH sh = fromIntegral $ sh .&. subkeyMask +{-# INLINE indexSH #-} + +-- | Version of 'mask' for use with @'ShiftedHash'es@. +maskSH :: ShiftedHash -> Bitmap +maskSH sh = 1 `unsafeShiftL` indexSH sh +{-# INLINE maskSH #-} + + ------------------------------------------------------------------------ -- Pointer equality From 54e008b9a16f6a5c0ee2cb4d6afabd6775f0fc8c Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 18:31:42 +0200 Subject: [PATCH 07/10] Remove comment --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 789488ba..dbf1754a 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -979,7 +979,7 @@ two' s h1 lc1 h2 lc2 = go (shiftHash s h1) lc1 (shiftHash s h2) lc2 -- | otherwise = 0 -- -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 -{-# INLINE two' #-} -- Really?! +{-# INLINE two' #-} -- | \(O(\log n)\) Associate the value with the key in this map. If -- this map previously contained a mapping for the key, the old value From 01c1b30978dfafd94a9ad128e31986eb7287fb72 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 18:31:51 +0200 Subject: [PATCH 08/10] two: Force the fresh Leaf Just to be safe. --- Data/HashMap/Internal.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index dbf1754a..8c2fe5b7 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -948,7 +948,8 @@ unsafeInsert k0 v0 m0 = runST (go h0 k0 v0 0 m0) -- It is the caller's responsibility to ensure that the HashMap argument is in -- WHNF. two :: Shift -> Hash -> k -> v -> Hash -> HashMap k v -> ST s (HashMap k v) -two s h1 k1 v1 = two' s h1 (Leaf h1 (L k1 v1)) +two s h1 k1 v1 = two' s h1 l + where !l = Leaf h1 (L k1 v1) {-# INLINE two #-} -- | Create a map from two 'Leaf' or 'Collision' nodes whose hashes are From f8c22baf82c751e1f2668cbd15f5969726ae3a86 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 19:46:23 +0200 Subject: [PATCH 09/10] Move two'.go to the top-level --- Data/HashMap/Internal.hs | 47 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 8c2fe5b7..658c9baa 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -958,30 +958,33 @@ two s h1 k1 v1 = two' s h1 l -- It is the caller's responsibility to ensure that both HashMap arguments are -- in WHNF. two' :: Shift -> Hash -> HashMap k v -> Hash -> HashMap k v -> ST s (HashMap k v) -two' s h1 lc1 h2 lc2 = go (shiftHash s h1) lc1 (shiftHash s h2) lc2 - where - go !sh1 t1 !sh2 t2 - | bp1 == bp2 = do - st <- go (nextSH sh1) t1 (nextSH sh2) t2 - ary <- A.singletonM st - return $ BitmapIndexed bp1 ary - | otherwise = do - mary <- A.new 2 t1 - A.write mary idx2 t2 - ary <- A.unsafeFreeze mary - return $ BitmapIndexed (bp1 .|. bp2) ary - where - !bp1@(W# bp1#) = maskSH sh1 - !bp2@(W# bp2#) = maskSH sh2 - idx2 = I# (bp1# `Exts.ltWord#` bp2#) - -- This way of computing idx2 saves us a branch compared to the previous approach: - -- - -- idx2 | index h1 s < index h2 s = 1 - -- | otherwise = 0 - -- - -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 +two' s h1 lc1 h2 lc2 = two_go (shiftHash s h1) lc1 (shiftHash s h2) lc2 {-# INLINE two' #-} +-- | This function lives at the top-level so 'two' and `two'` can be inlined +-- without inlining this loop. +two_go :: ShiftedHash -> HashMap k v -> ShiftedHash -> HashMap k v -> ST s (HashMap k v) +two_go !sh1 t1 !sh2 t2 + | bp1 == bp2 = do + st <- two_go (nextSH sh1) t1 (nextSH sh2) t2 + ary <- A.singletonM st + return $ BitmapIndexed bp1 ary + | otherwise = do + mary <- A.new 2 t1 + A.write mary idx2 t2 + ary <- A.unsafeFreeze mary + return $ BitmapIndexed (bp1 .|. bp2) ary + where + !bp1@(W# bp1#) = maskSH sh1 + !bp2@(W# bp2#) = maskSH sh2 + idx2 = I# (bp1# `Exts.ltWord#` bp2#) + -- This way of computing idx2 saves us a branch compared to the previous approach: + -- + -- idx2 | index h1 s < index h2 s = 1 + -- | otherwise = 0 + -- + -- See https://github.com/haskell-unordered-containers/unordered-containers/issues/75#issuecomment-1128419337 + -- | \(O(\log n)\) Associate the value with the key in this map. If -- this map previously contained a mapping for the key, the old value -- is replaced by the result of applying the given function to the new From 2d1d3c2bb02008650e801a660497fed4e4b6430f Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 19 Oct 2025 22:39:19 +0200 Subject: [PATCH 10/10] insertModifying: Use two with Collisions --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 658c9baa..7726ae32 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1044,7 +1044,7 @@ insertModifying x f k0 m0 = go h0 k0 0 m0 in if A.unsafeSameArray v v' then t else Collision h v' - | otherwise = go h k s $ BitmapIndexed (mask hy s) (A.singleton t) + | otherwise = runST (two s h k x hy t) {-# INLINABLE insertModifying #-} -- | Like insertModifying for arrays; used to implement insertModifying