Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize IntSet.Bin #998

Merged
merged 8 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
module LookupGE_IntMap where

import Prelude hiding (null)
import Data.IntSet.Internal.IntTreeCommons
(Key, Prefix(..), nomatch, signBranch, left)
import Data.IntMap.Internal

lookupGE1 :: Key -> IntMap a -> Maybe (Key,a)
Expand Down
1 change: 1 addition & 0 deletions containers-tests/containers-tests.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ library
Data.IntMap.Strict.Internal
Data.IntSet
Data.IntSet.Internal
Data.IntSet.Internal.IntTreeCommons
Data.Map
Data.Map.Internal
Data.Map.Internal.Debug
Expand Down
43 changes: 22 additions & 21 deletions containers-tests/tests/IntMapValidity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module IntMapValidity

import Data.Bits (finiteBitSize, testBit, xor, (.&.))
import Data.List (intercalate, elemIndex)
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import Data.IntMap.Internal
import Numeric (showHex)
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
Expand All @@ -17,7 +18,7 @@ import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
valid :: IntMap a -> Property
valid t =
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
counterexample "prefixOk" (prefixOk t)
counterexample "prefixesOk" (prefixesOk t)

-- Invariant: Nil is never found as a child of Bin.
nilNeverChildOfBin :: IntMap a -> Bool
Expand All @@ -37,26 +38,26 @@ nilNeverChildOfBin t =
-- * All keys in a Bin start with the Bin's shared prefix.
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
-- * All keys in the Bin's right child have the Prefix's mask bit set.
prefixOk :: IntMap a -> Property
prefixOk t =
case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r ->
let px = unPrefix p
m = px .&. (-px)
keysl = keys l
keysr = keys r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
in counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)
prefixesOk :: IntMap a -> Property
prefixesOk t = case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
where
px = unPrefix p
m = px .&. (-px)
keysl = keys l
keysr = keys r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
currentOk = counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)

hasPrefix :: Int -> Prefix -> Bool
hasPrefix i p = not (nomatch i p)
Expand Down
79 changes: 38 additions & 41 deletions containers-tests/tests/IntSetValidity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
module IntSetValidity (valid) where

import Data.Bits (xor, (.&.))
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import Data.IntSet.Internal
import Data.List (intercalate)
import Numeric (showHex)
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
import Utils.Containers.Internal.BitUtil (bitcount)

Expand All @@ -13,9 +16,7 @@ import Utils.Containers.Internal.BitUtil (bitcount)
valid :: IntSet -> Property
valid t =
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
counterexample "maskPowerOfTwo" (maskPowerOfTwo t) .&&.
counterexample "commonPrefix" (commonPrefix t) .&&.
counterexample "markRespected" (maskRespected t) .&&.
counterexample "prefixesOk" (prefixesOk t) .&&.
counterexample "tipsValid" (tipsValid t)

-- Invariant: Nil is never found as a child of Bin.
Expand All @@ -24,48 +25,41 @@ nilNeverChildOfBin t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ _ l r -> noNilInSet l && noNilInSet r
Bin _ l r -> noNilInSet l && noNilInSet r
where
noNilInSet t' =
case t' of
Nil -> False
Tip _ _ -> True
Bin _ _ l' r' -> noNilInSet l' && noNilInSet r'
Bin _ l' r' -> noNilInSet l' && noNilInSet r'

-- Invariant: The Mask is a power of 2. It is the largest bit position at which
-- two elements of the set differ.
maskPowerOfTwo :: IntSet -> Bool
maskPowerOfTwo t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ m l r ->
bitcount 0 (fromIntegral m) == 1 && maskPowerOfTwo l && maskPowerOfTwo r

-- Invariant: Prefix is the common high-order bits that all elements share to
-- the left of the Mask bit.
commonPrefix :: IntSet -> Bool
commonPrefix t =
case t of
Nil -> True
Tip _ _ -> True
b@(Bin p _ l r) -> all (sharedPrefix p) (elems b) && commonPrefix l && commonPrefix r
where
sharedPrefix :: Prefix -> Int -> Bool
sharedPrefix p a = p == p .&. a
-- Invariants:
-- * All keys in a Bin start with the Bin's shared prefix.
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
-- * All keys in the Bin's right child have the Prefix's mask bit set.
prefixesOk :: IntSet -> Property
prefixesOk t = case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
where
px = unPrefix p
m = px .&. (-px)
keysl = elems l
keysr = elems r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
currentOk = counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)

-- Invariant: In Bin prefix mask left right, left consists of the elements that
-- don't have the mask bit set; right is all the elements that do.
maskRespected :: IntSet -> Bool
maskRespected t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ binMask l r ->
all (\x -> zero x binMask) (elems l) &&
all (\x -> not (zero x binMask)) (elems r) &&
maskRespected l &&
maskRespected r
hasPrefix :: Int -> Prefix -> Bool
hasPrefix i p = not (nomatch i p)

-- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits
-- (on 64 bit arches). The values of the set represented by a tip
Expand All @@ -76,14 +70,17 @@ tipsValid :: IntSet -> Bool
tipsValid t =
case t of
Nil -> True
tip@(Tip p b) -> validTipPrefix p
Bin _ _ l r -> tipsValid l && tipsValid r
tip@(Tip p b) -> validTipPrefix p && b /= 0
Bin _ l r -> tipsValid l && tipsValid r

validTipPrefix :: Prefix -> Bool
validTipPrefix :: Int -> Bool
#if WORD_SIZE_IN_BITS==32
-- Last 5 bits of the prefix must be zero for 32 bit arches.
validTipPrefix p = (0x0000001F .&. p) == 0
#else
-- Last 6 bits of the prefix must be zero for 64 bit arches.
validTipPrefix p = (0x000000000000003F .&. p) == 0
#endif

showIntHex :: Int -> String
showIntHex x = "0x" ++ showHex (fromIntegral x :: Word) ""
2 changes: 1 addition & 1 deletion containers-tests/tests/intmap-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Data.IntMap.Internal (traverseMaybeWithKey)
import Data.IntMap.Merge.Lazy
#endif
import Data.IntMap.Internal.Debug (showTree)
import Data.IntMap.Internal (Prefix(..))
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import IntMapValidity (hasPrefix, hasPrefixSimple, valid)

import Control.Applicative (Applicative(..))
Expand Down
58 changes: 20 additions & 38 deletions containers-tests/tests/intset-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,15 @@ main = defaultMain $ testGroup "intset-properties"
, testProperty "prop_UnionInsert" prop_UnionInsert
, testProperty "prop_UnionAssoc" prop_UnionAssoc
, testProperty "prop_UnionComm" prop_UnionComm
, testProperty "prop_Diff" prop_Diff
, testProperty "prop_Int" prop_Int
, testProperty "prop_union" prop_union
, testProperty "prop_difference" prop_difference
, testProperty "prop_intersection" prop_intersection
, testProperty "prop_Ordered" prop_Ordered
, testProperty "prop_List" prop_List
, testProperty "prop_DescList" prop_DescList
, testProperty "prop_AscDescList" prop_AscDescList
, testProperty "prop_fromList" prop_fromList
, testProperty "prop_fromRange" prop_fromRange
, testProperty "prop_MaskPow2" prop_MaskPow2
, testProperty "prop_Prefix" prop_Prefix
, testProperty "prop_LeftRight" prop_LeftRight
, testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf
, testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2
, testProperty "prop_isSubsetOf" prop_isSubsetOf
Expand Down Expand Up @@ -113,9 +111,8 @@ test_split = do
Arbitrary, reasonably balanced trees
--------------------------------------------------------------------}
instance Arbitrary IntSet where
arbitrary = do{ xs <- arbitrary
; return (fromList xs)
}
arbitrary = fromList <$> oneof [arbitrary, fmap (fmap getLarge) arbitrary]
shrink = fmap fromList . shrink . toAscList

{--------------------------------------------------------------------
Valid IntMaps
Expand Down Expand Up @@ -232,19 +229,26 @@ prop_UnionComm :: IntSet -> IntSet -> Bool
prop_UnionComm t1 t2
= (union t1 t2 == union t2 t1)

prop_Diff :: [Int] -> [Int] -> Property
prop_Diff xs ys =
case difference (fromList xs) (fromList ys) of
prop_union :: IntSet -> IntSet -> Property
prop_union xs ys =
case union xs ys of
t ->
valid t .&&.
toAscList t === List.sort ((List.\\) (nub xs) (nub ys))
toAscList t === List.nub (List.sort (toAscList xs ++ toAscList ys))

prop_Int :: [Int] -> [Int] -> Property
prop_Int xs ys =
case intersection (fromList xs) (fromList ys) of
prop_difference :: IntSet -> IntSet -> Property
prop_difference xs ys =
case difference xs ys of
t ->
valid t .&&.
toAscList t === List.sort (nub ((List.intersect) (xs) (ys)))
toAscList t === (toAscList xs List.\\ toAscList ys)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Data.List.Ordered.minus would be faster than \\.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean from data-ordlist? Surely this is not worth a new dependency?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, from that package. I imagine we can copy what we like, though I haven't checked its license to be sure. My only real concern about the dependency is that the package doesn't seem to be actively maintained, so it might run into annoying issues with base changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the tests are quite fast (as mentioned the other comment), this seems like something that shouldn't cause too much worry.
Besides, a new function/dependency is another chance to introduce a bug. Data.List is a little more trustworthy in that regard, I'd say.


prop_intersection :: IntSet -> IntSet -> Property
prop_intersection xs ys =
case intersection xs ys of
t ->
valid t .&&.
toAscList t === (toAscList xs `List.intersect` toAscList ys)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, Data.List.Ordered.isect should be faster than List.intersect.


prop_disjoint :: IntSet -> IntSet -> Bool
prop_disjoint a b = a `disjoint` b == null (a `intersection` b)
Expand Down Expand Up @@ -284,28 +288,6 @@ prop_fromRange = forAll (scale (*100) arbitrary) go
go (l,h) = valid t .&&. t === fromAscList [l..h]
where t = fromRange (l,h)

{--------------------------------------------------------------------
Bin invariants
--------------------------------------------------------------------}
powersOf2 :: IntSet
powersOf2 = fromList [2^i | i <- [0..63]]

-- Check the invariant that the mask is a power of 2.
prop_MaskPow2 :: IntSet -> Bool
prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right
prop_MaskPow2 _ = True

-- Check that the prefix satisfies its invariant.
prop_Prefix :: IntSet -> Bool
prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right
prop_Prefix _ = True

-- Check that the left elements don't have the mask bit set, and the right
-- ones do.
prop_LeftRight :: IntSet -> Bool
prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right]
prop_LeftRight _ = True

{--------------------------------------------------------------------
IntSet operations are like Set operations
--------------------------------------------------------------------}
Expand Down
1 change: 1 addition & 0 deletions containers/containers.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Library
Data.IntMap.Merge.Lazy
Data.IntMap.Merge.Strict
Data.IntSet.Internal
Data.IntSet.Internal.IntTreeCommons
Data.IntSet
Data.Map
Data.Map.Lazy
Expand Down
Loading
Loading