From a1466a51f1a752c685c36dcfd1df7f06e92112a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Sun, 1 Jan 2017 22:12:43 +0100 Subject: [PATCH 1/2] Add function to check if two `IntSet`s are `disjoint`. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is equivalent to computing the intersection and checking if it is empty. However, it is more efficient because the intersection set does not need to be built in memory, and the computation can be short-circuited as soon as two non-disjoint `Tip`s are found. Benchmark intset-benchmarks: RUNNING... [...] benchmarking disjoint:false time 149.2 ns (147.7 ns .. 150.6 ns) 0.999 R² (0.998 R² .. 1.000 R²) mean 147.0 ns (145.4 ns .. 148.6 ns) std dev 5.281 ns (4.234 ns .. 6.800 ns) variance introduced by outliers: 54% (severely inflated) benchmarking disjoint:true time 2.500 μs (2.468 μs .. 2.533 μs) 0.999 R² (0.999 R² .. 0.999 R²) mean 2.451 μs (2.425 μs .. 2.477 μs) std dev 81.58 ns (69.22 ns .. 98.12 ns) variance introduced by outliers: 44% (moderately inflated) benchmarking null.intersection:false time 4.077 μs (4.038 μs .. 4.122 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 4.026 μs (3.983 μs .. 4.090 μs) std dev 170.3 ns (121.3 ns .. 264.2 ns) variance introduced by outliers: 54% (severely inflated) benchmarking null.intersection:true time 2.527 μs (2.468 μs .. 2.610 μs) 0.996 R² (0.993 R² .. 0.999 R²) mean 2.490 μs (2.459 μs .. 2.535 μs) std dev 122.4 ns (85.89 ns .. 180.2 ns) variance introduced by outliers: 63% (severely inflated) --- Data/IntSet.hs | 1 + Data/IntSet/Internal.hs | 49 ++++++++++++++++++++++++++++++++++++++ benchmarks/IntSet.hs | 4 ++++ tests/intset-properties.hs | 7 +++++- 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/Data/IntSet.hs b/Data/IntSet.hs index 7100523e2..f47dccf1e 100644 --- a/Data/IntSet.hs +++ b/Data/IntSet.hs @@ -74,6 +74,7 @@ module Data.IntSet ( , lookupGE , isSubsetOf , isProperSubsetOf + , disjoint -- * Construction , empty diff --git a/Data/IntSet/Internal.hs b/Data/IntSet/Internal.hs index ea1124e89..3772bcd8d 100644 --- a/Data/IntSet/Internal.hs +++ b/Data/IntSet/Internal.hs @@ -118,6 +118,7 @@ module Data.IntSet.Internal ( , lookupGE , isSubsetOf , isProperSubsetOf + , disjoint -- * Construction , empty @@ -659,6 +660,54 @@ isSubsetOf (Tip _ _) Nil = False isSubsetOf Nil _ = True +{-------------------------------------------------------------------- + Disjoint +--------------------------------------------------------------------} +-- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection +-- is empty). +-- +-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True +-- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False +-- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False +-- > disjoint (fromList []) (fromList []) == True +-- +-- @since 0.5.11 +disjoint :: IntSet -> IntSet -> Bool +disjoint t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) + | shorter m1 m2 = disjoint1 + | shorter m2 m1 = disjoint2 + | p1 == p2 = disjoint l1 l2 && disjoint r1 r2 + | otherwise = True + where + disjoint1 | nomatch p2 p1 m1 = True + | zero p2 m1 = disjoint l1 t2 + | otherwise = disjoint r1 t2 + + disjoint2 | nomatch p1 p2 m2 = True + | zero p1 m2 = disjoint t1 l2 + | otherwise = disjoint t1 r2 + +disjoint t1@(Bin _ _ _ _) (Tip kx2 bm2) = disjointBM t1 + where disjointBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = True + | zero kx2 m1 = disjointBM l1 + | otherwise = disjointBM r1 + disjointBM (Tip kx1 bm1) | kx1 == kx2 = (bm1 .&. bm2) == 0 + | otherwise = True + disjointBM Nil = True + +disjoint (Bin _ _ _ _) Nil = True + +disjoint (Tip kx1 bm1) t2 = disjointBM t2 + where disjointBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = True + | zero kx1 m2 = disjointBM l2 + | otherwise = disjointBM r2 + disjointBM (Tip kx2 bm2) | kx1 == kx2 = (bm1 .&. bm2) == 0 + | otherwise = True + disjointBM Nil = True + +disjoint Nil _ = True + + {-------------------------------------------------------------------- Filter --------------------------------------------------------------------} diff --git a/benchmarks/IntSet.hs b/benchmarks/IntSet.hs index 7288e2e6e..95d99d5e9 100644 --- a/benchmarks/IntSet.hs +++ b/benchmarks/IntSet.hs @@ -32,6 +32,10 @@ main = do , bench "fromList" $ whnf S.fromList elems , bench "fromAscList" $ whnf S.fromAscList elems , bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems + , bench "disjoint:false" $ whnf (S.disjoint s) s_even + , bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even + , bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even + , bench "null.intersection:true" $ whnf (S.null. S.intersection s_odd) s_even ] where elems = [1..2^12] diff --git a/tests/intset-properties.hs b/tests/intset-properties.hs index 56283c0d2..eecb9eb97 100644 --- a/tests/intset-properties.hs +++ b/tests/intset-properties.hs @@ -54,6 +54,7 @@ main = defaultMain [ testCase "lookupLT" test_lookupLT , testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2 , testProperty "prop_isSubsetOf" prop_isSubsetOf , testProperty "prop_isSubsetOf2" prop_isSubsetOf2 + , testProperty "prop_disjoint" prop_disjoint , testProperty "prop_size" prop_size , testProperty "prop_findMax" prop_findMax , testProperty "prop_findMin" prop_findMin @@ -202,7 +203,7 @@ prop_MemberFromList xs t = fromList abs_xs {-------------------------------------------------------------------- - Union + Union, Difference and Intersection --------------------------------------------------------------------} prop_UnionInsert :: Int -> IntSet -> Property prop_UnionInsert x t = @@ -233,6 +234,9 @@ prop_Int xs ys = valid t .&&. toAscList t === List.sort (nub ((List.intersect) (xs) (ys))) +prop_disjoint :: IntSet -> IntSet -> Bool +prop_disjoint a b = a `disjoint` b == null (a `intersection` b) + {-------------------------------------------------------------------- Lists --------------------------------------------------------------------} @@ -402,3 +406,4 @@ prop_bitcount a w = bitcount_orig a w == bitcount_new a w go a x = go (a + 1) (x .&. (x-1)) bitcount_new a x = a + popCount x #endif + From 2599afc046b7be70656cf76973111e0cc58e5f85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20L=C3=B3pez=20Juan?= Date: Fri, 19 Jan 2018 22:19:00 +0100 Subject: [PATCH 2/2] Add `disjoint` for Data.Set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is added mostly for consistency with `IntSet.disjoint`. Performance also improves compared to the corresponding `(null.).intersection` implementation. Benchmark set-benchmarks: RUNNING... [...] benchmarking disjoint:false time 69.64 ns (68.87 ns .. 70.69 ns) 0.998 R² (0.997 R² .. 0.999 R²) mean 69.39 ns (68.31 ns .. 70.86 ns) std dev 3.819 ns (2.979 ns .. 4.857 ns) variance introduced by outliers: 75% (severely inflated) benchmarking disjoint:true time 611.5 μs (606.6 μs .. 618.0 μs) 0.999 R² (0.997 R² .. 1.000 R²) mean 620.2 μs (608.3 μs .. 664.9 μs) std dev 68.26 μs (14.09 μs .. 140.9 μs) variance introduced by outliers: 79% (severely inflated) benchmarking null.intersection:false time 234.9 μs (227.3 μs .. 241.2 μs) 0.995 R² (0.993 R² .. 0.998 R²) mean 218.8 μs (214.7 μs .. 224.3 μs) std dev 14.66 μs (11.51 μs .. 18.56 μs) variance introduced by outliers: 62% (severely inflated) benchmarking null.intersection:true time 732.3 μs (701.0 μs .. 767.7 μs) 0.989 R² (0.983 R² .. 0.996 R²) mean 726.3 μs (711.1 μs .. 742.9 μs) std dev 50.46 μs (38.81 μs .. 63.17 μs) variance introduced by outliers: 58% (severely inflated) --- Data/Set.hs | 1 + Data/Set/Internal.hs | 22 ++++++++++++++++++++++ benchmarks/Set.hs | 4 ++++ tests/set-properties.hs | 4 ++++ 4 files changed, 31 insertions(+) diff --git a/Data/Set.hs b/Data/Set.hs index f330a1eb6..c0396b0e7 100644 --- a/Data/Set.hs +++ b/Data/Set.hs @@ -74,6 +74,7 @@ module Data.Set ( , lookupGE , isSubsetOf , isProperSubsetOf + , disjoint -- * Construction , empty diff --git a/Data/Set/Internal.hs b/Data/Set/Internal.hs index c000392a6..816d9c0c2 100644 --- a/Data/Set/Internal.hs +++ b/Data/Set/Internal.hs @@ -141,6 +141,7 @@ module Data.Set.Internal ( , lookupGE , isSubsetOf , isProperSubsetOf + , disjoint -- * Construction , empty @@ -622,6 +623,27 @@ isSubsetOfX (Bin _ x l r) t {-# INLINABLE isSubsetOfX #-} #endif +{-------------------------------------------------------------------- + Disjoint +--------------------------------------------------------------------} +-- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection +-- is empty). +-- +-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True +-- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False +-- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False +-- > disjoint (fromList []) (fromList []) == True +-- +-- @since 0.5.11 + +disjoint :: Ord a => Set a -> Set a -> Bool +disjoint Tip _ = True +disjoint _ Tip = True +disjoint (Bin _ x l r) t + -- Analogous implementation to `subsetOfX` + = not found && disjoint l lt && disjoint r gt + where + (lt,found,gt) = splitMember x t {-------------------------------------------------------------------- Minimal, Maximal diff --git a/benchmarks/Set.hs b/benchmarks/Set.hs index 67ef83572..d0086b95d 100644 --- a/benchmarks/Set.hs +++ b/benchmarks/Set.hs @@ -33,6 +33,10 @@ main = do , bench "fromList-desc" $ whnf S.fromList (reverse elems) , bench "fromAscList" $ whnf S.fromAscList elems , bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems + , bench "disjoint:false" $ whnf (S.disjoint s) s_even + , bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even + , bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even + , bench "null.intersection:true" $ whnf (S.null. S.intersection s_odd) s_even ] where elems = [1..2^12] diff --git a/tests/set-properties.hs b/tests/set-properties.hs index 46ae416cc..e235c0add 100644 --- a/tests/set-properties.hs +++ b/tests/set-properties.hs @@ -67,6 +67,7 @@ main = defaultMain [ testCase "lookupLT" test_lookupLT , testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2 , testProperty "prop_isSubsetOf" prop_isSubsetOf , testProperty "prop_isSubsetOf2" prop_isSubsetOf2 + , testProperty "prop_disjoint" prop_disjoint , testProperty "prop_size" prop_size , testProperty "prop_lookupMax" prop_lookupMax , testProperty "prop_lookupMin" prop_lookupMin @@ -426,6 +427,9 @@ prop_Int :: [Int] -> [Int] -> Bool prop_Int xs ys = toAscList (intersection (fromList xs) (fromList ys)) == List.sort (nub ((List.intersect) (xs) (ys))) +prop_disjoint :: Set Int -> Set Int -> Bool +prop_disjoint a b = a `disjoint` b == null (a `intersection` b) + {-------------------------------------------------------------------- Lists --------------------------------------------------------------------}