Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alternative optimisations for insertion #980

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions containers-tests/benchmarks/Map.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ main = do
let m = M.fromAscList elems :: M.Map Int Int
m_even = M.fromAscList elems_even :: M.Map Int Int
m_odd = M.fromAscList elems_odd :: M.Map Int Int
evaluate $ rnf [m, m_even, m_odd]
evaluate $ rnf elems_rev
m_sparse = M.filter (\v -> v `mod` 15 == 0) m_even
evaluate $ rnf [m, m_even, m_odd, m_sparse]
evaluate $ rnf [elems_rev, elems_alts]
defaultMain
[ bench "lookup absent" $ whnf (lookup evens) m_odd
, bench "lookup present" $ whnf (lookup evens) m_even
Expand All @@ -35,6 +36,7 @@ main = do
, bench "alterF no rules lookup present" $ whnf (atLookupNoRules evens) m_even
, bench "insert absent" $ whnf (ins elems_even) m_odd
, bench "insert present" $ whnf (ins elems_even) m_even
, bench "insert alternate" $ whnf (ins elems_alts) m_even
, bench "alterF insert absent" $ whnf (atIns elems_even) m_odd
, bench "alterF insert present" $ whnf (atIns elems_even) m_even
, bench "alterF no rules insert absent" $ whnf (atInsNoRules elems_even) m_odd
Expand Down Expand Up @@ -84,6 +86,9 @@ main = do
, bench "mapMaybeWithKey" $ whnf (M.mapMaybeWithKey (const maybeDel)) m
, bench "lookupIndex" $ whnf (lookupIndex keys) m
, bench "union" $ whnf (M.union m_even) m_odd
, bench "union_identical" $ whnf (M.union m_even) m_even
, bench "union_sparse" $ whnf (M.union m_even) m_sparse
, bench "union_into_sparse" $ whnf (M.union m_sparse) m_even
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two more with m_sparse and m_odd would show if there's a slowdown in insertR.

, bench "difference" $ whnf (M.difference m) m_even
, bench "intersection" $ whnf (M.intersection m) m_even
, bench "split" $ whnf (M.split (bound `div` 2)) m
Expand All @@ -100,6 +105,7 @@ main = do
bound = 2^12
elems = zip keys values
elems_even = zip evens evens
elems_alts = zip evens odds
elems_odd = zip odds odds
elems_rev = reverse elems
keys = [1..bound]
Expand Down
83 changes: 32 additions & 51 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ import Utils.Containers.Internal.BitUtil (wordSize)
#endif

#if __GLASGOW_HASKELL__
import GHC.Exts (build, lazy)
import GHC.Exts (build)
import Language.Haskell.TH.Syntax (Lift)
-- See Note [ Template Haskell Dependencies ]
import Language.Haskell.TH ()
Expand Down Expand Up @@ -776,24 +776,24 @@ singleton k x = Bin 1 k x Tip Tip
-- See Note: Type of local 'go' function
-- See Note: Avoiding worker/wrapper
insert :: Ord k => k -> a -> Map k a -> Map k a
insert kx0 = go kx0 kx0
insert kx0 ax0 m0 =
case go kx0 ax0 m0 of (m :*: _) -> m
where
-- Unlike insertR, we only get sharing here
-- when the inserted value is at the same address
-- as the present value. We try anyway; this condition
-- seems particularly likely to occur in 'union'.
go :: Ord k => k -> k -> a -> Map k a -> Map k a
go orig !_ x Tip = singleton (lazy orig) x
go orig !kx x t@(Bin sz ky y l r) =
go :: Ord k => k -> a -> Map k a -> StrictPair (Map k a) Bool
go !kx x Tip = singleton kx x :*: False
go !kx x (Bin sz ky y l r) =
case compare kx ky of
LT | l' `ptrEq` l -> t
| otherwise -> balanceL ky y l' r
where !l' = go orig kx x l
GT | r' `ptrEq` r -> t
| otherwise -> balanceR ky y l r'
where !r' = go orig kx x r
EQ | x `ptrEq` y && (lazy orig `seq` (orig `ptrEq` ky)) -> t
| otherwise -> Bin sz (lazy orig) x l r
LT | found -> Bin sz ky y l' r :*: found
| otherwise -> balanceL ky y l' r :*: found
where !(l' :*: found) = go kx x l
GT | found -> Bin sz ky y l r' :*: found
| otherwise -> balanceR ky y l r' :*: found
where !(r' :*: found) = go kx x r
EQ -> Bin sz kx x l r :*: True
#if __GLASGOW_HASKELL__
{-# INLINABLE insert #-}
#else
Expand All @@ -805,39 +805,20 @@ lazy :: a -> a
lazy a = a
#endif

-- [Note: Avoiding worker/wrapper]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 'insert' has to go to great lengths to get pointer equality right and
-- to prevent unnecessary allocation. The trouble is that GHC *really* wants
-- to unbox the key and throw away the boxed one. This is bad for us, because
-- we want to compare the pointer of the box we are given to the one already
-- present if they compare EQ. It's also bad for us because it leads to the
-- key being *reboxed* if it's actually stored in the map. Ugh! So we pass the
-- 'go' function *two copies* of the key we're given. One of them we use for
-- comparisons; the other we keep in our pocket. To prevent worker/wrapper from
-- messing with the copy in our pocket, we sprinkle about calls to the magical
-- function 'lazy'. This is all horrible, but it seems to work okay.


-- Insert a new key and value in the map if it is not already present.
-- Used by `union`.

-- See Note: Type of local 'go' function
-- See Note: Avoiding worker/wrapper
insertR :: Ord k => k -> a -> Map k a -> Map k a
insertR kx0 = go kx0 kx0
insertR k0 a0 m0 = go k0 a0 m0 id
where
go :: Ord k => k -> k -> a -> Map k a -> Map k a
go orig !_ x Tip = singleton (lazy orig) x
go orig !kx x t@(Bin _ ky y l r) =
-- Use an explicit continuation which isn't executed if the
-- key is found.
go !kx x Tip k = k (singleton kx x)
go !kx x (Bin _ ky y l r) k =
case compare kx ky of
LT | l' `ptrEq` l -> t
| otherwise -> balanceL ky y l' r
where !l' = go orig kx x l
GT | r' `ptrEq` r -> t
| otherwise -> balanceR ky y l r'
where !r' = go orig kx x r
EQ -> t
LT -> go kx x l (k . (\l' -> balanceL ky y l' r))
GT -> go kx x r (k . (\r' -> balanceR ky y l r'))
EQ -> m0
#if __GLASGOW_HASKELL__
{-# INLINABLE insertR #-}
#else
Expand All @@ -857,19 +838,19 @@ insertR kx0 = go kx0 kx0
-- Also see the performance note on 'fromListWith'.

insertWith :: Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
insertWith = go
insertWith f k0 a0 m0 =
case go k0 a0 m0 of (m :*: _) -> m
where
-- We have no hope of making pointer equality tricks work
-- here, because lazy insertWith *always* changes the tree,
-- either adding a new entry or replacing an element with a
-- thunk.
go :: Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
go _ !kx x Tip = singleton kx x
go f !kx x (Bin sy ky y l r) =
go !kx x Tip = singleton kx x :*: False
go !kx x (Bin sy ky y l r) =
case compare kx ky of
LT -> balanceL ky y (go f kx x l) r
GT -> balanceR ky y l (go f kx x r)
EQ -> Bin sy kx (f x y) l r
LT | found -> Bin sy ky y l' r :*: found
| otherwise -> balanceL ky y l' r :*: found
where !(l' :*: found) = go kx x l
GT | found -> Bin sy ky y l r' :*: found
| otherwise -> balanceR ky y l r' :*: found
where !(r' :*: found) = go kx x r
EQ -> Bin sy kx (f x y) l r :*: True

#if __GLASGOW_HASKELL__
{-# INLINABLE insertWith #-}
Expand Down