Skip to content

Commit

Permalink
Add NonEmpty variants of inits and tails (#557)
Browse files Browse the repository at this point in the history
* Add basic benchmarks for inits/tails

* Add NonEmpty variants of inits and tails

  The lazy versions use new implementations:
  - Lazy tails got about 10% faster with ghc-9.2. (A happy accident!)
  - Lazy inits got much faster:
    - For the first few chunks it is about 1.5x faster, due to better list fusion.
    - When there are many chunks it is about 4x faster.

* Formatting and comments, as suggested in review

* Add link to a relevant CLC issue about NonEmpty

  - haskell/core-libraries-committee#107
  • Loading branch information
clyring authored Nov 28, 2022
1 parent 6455e29 commit d4933c6
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 20 deletions.
46 changes: 40 additions & 6 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ module Data.ByteString (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -235,6 +237,8 @@ import Data.ByteString.Lazy.Internal (fromStrict, toStrict)
import Data.ByteString.Unsafe

import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty (NonEmpty(..))

import Data.Word (Word8)

Expand Down Expand Up @@ -427,7 +431,7 @@ last ps@(BS x l)
unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)
{-# INLINE last #-}

-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
-- | /O(1)/ Returns all the elements of a 'ByteString' except the last one.
-- An exception will be thrown in the case of an empty ByteString.
--
-- This is a partial function, consider using 'unsnoc' instead.
Expand Down Expand Up @@ -1686,17 +1690,47 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
-- ---------------------------------------------------------------------
-- Special lists

-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
inits :: ByteString -> [ByteString]
inits (BS x l) = [BS x n | n <- [0..l]]
-- see Note [Avoid NonEmpty combinators]
inits bs = NE.toList $! initsNE bs

-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
--
-- @since 0.11.4.0
initsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators]
initsNE (BS x len) = empty :| [BS x n | n <- [1..len]]

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
tails p | null p = [empty]
| otherwise = p : tails (unsafeTail p)
-- see Note [Avoid NonEmpty combinators]
tails bs = NE.toList $! tailsNE bs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
--
-- @since 0.11.4.0
tailsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators]
tailsNE p | null p = empty :| []
| otherwise = p :| tails (unsafeTail p)

-- less efficent spacewise: tails (BS x l) = [BS (plusForeignPtr x n) (l-n) | n <- [0..l]]

{-
Note [Avoid NonEmpty combinators]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As of base-4.17, most of the NonEmpty API is surprisingly lazy.
Using it without forcing the arguments yourself is just begging GHC
to make your code waste time allocating useless selector thunks.
This may change in the future. See also this CLC issue:
https://github.com/haskell/core-libraries-committee/issues/107
But until then, "refactor" with care!
-}



-- ---------------------------------------------------------------------
-- ** Ordered 'ByteString's

Expand Down
4 changes: 3 additions & 1 deletion Data/ByteString/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ module Data.ByteString.Char8 (
groupBy,
inits,
tails,
initsNE,
tailsNE,
strip,
stripPrefix,
stripSuffix,
Expand Down Expand Up @@ -261,7 +263,7 @@ import qualified Data.ByteString.Unsafe as B

-- Listy functions transparently exported
import Data.ByteString (null,length,tail,init,append
,inits,tails,reverse,transpose
,inits,tails,initsNE,tailsNE,reverse,transpose
,concat,take,takeEnd,drop,dropEnd,splitAt
,intercalate,sort,isPrefixOf,isSuffixOf
,isInfixOf,stripPrefix,stripSuffix
Expand Down
46 changes: 35 additions & 11 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ module Data.ByteString.Lazy (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -228,6 +230,8 @@ import Prelude hiding
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)

import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Bifunctor as BF
import qualified Data.ByteString as P (ByteString) -- type name only
import qualified Data.ByteString as S -- S for strict (hmm...)
Expand Down Expand Up @@ -384,7 +388,7 @@ last (Chunk c0 cs0) = go c0 cs0
go _ (Chunk c cs) = go c cs
-- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)

-- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
-- | /O(n\/c)/ Returns all the elements of a 'ByteString' except the last one.
--
-- This is a partial function, consider using 'unsnoc' instead.
init :: HasCallStack => ByteString -> ByteString
Expand Down Expand Up @@ -1433,19 +1437,39 @@ unzip ls = (pack (List.map fst ls), pack (List.map snd ls))
-- ---------------------------------------------------------------------
-- Special lists

-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
-- | Returns all initial segments of the given 'ByteString', shortest first.
inits :: ByteString -> [ByteString]
inits = (Empty :) . inits'
where inits' Empty = []
inits' (Chunk c cs) = List.map (`Chunk` Empty) (List.drop 1 (S.inits c))
++ List.map (Chunk c) (inits' cs)
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
inits bs = NE.toList $! initsNE bs

-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
-- | Returns all initial segments of the given 'ByteString', shortest first.
--
-- @since 0.11.4.0
initsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
initsNE = (Empty :|) . inits' id
where
inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString]
-- inits' f bs === map f (tail (inits bs))
inits' _ Empty = []
inits' f (Chunk c@(S.BS x len) cs)
= [f (S.BS x n `Chunk` Empty) | n <- [1..len]]
++ inits' (f . Chunk c) cs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
tails Empty = [Empty]
tails cs@(Chunk c cs')
| S.length c == 1 = cs : tails cs'
| otherwise = cs : tails (Chunk (S.unsafeTail c) cs')
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
tails bs = NE.toList $! tailsNE bs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
--
-- @since 0.11.4.0
tailsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
tailsNE bs = case uncons bs of
Nothing -> Empty :| []
Just (_, tl) -> bs :| tails tl


-- ---------------------------------------------------------------------
-- Low level constructors
Expand Down
4 changes: 3 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ module Data.ByteString.Lazy.Char8 (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -233,7 +235,7 @@ import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,isPrefixOf,isSuffixOf,group,inits,tails,initsNE,tailsNE,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
,hGetNonBlocking, hPutNonBlocking
Expand Down
15 changes: 15 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ lazyByteStringData :: L.ByteString
lazyByteStringData = case S.splitAt (nRepl `div` 2) byteStringData of
(bs1, bs2) -> L.fromChunks [bs1, bs2]

{-# NOINLINE smallChunksData #-}
smallChunksData :: L.ByteString
smallChunksData
= L.fromChunks [S.take sz (S.drop n byteStringData)
| let sz = 48, n <- [0, sz .. S.length byteStringData]]

{-# NOINLINE byteStringChunksData #-}
byteStringChunksData :: [S.ByteString]
byteStringChunksData = map (S.pack . replicate (4 ) . fromIntegral) intData
Expand Down Expand Up @@ -404,6 +410,15 @@ main = do
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
]
]
, bgroup "inits"
[ bench "strict" $ nf S.inits byteStringData
, bench "lazy" $ nf L.inits lazyByteStringData
, bench "lazy (small chunks)" $ nf L.inits smallChunksData
]
, bgroup "tails"
[ bench "strict" $ nf S.tails byteStringData
, bench "lazy" $ nf L.tails lazyByteStringData
]
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
, bgroup "stimes" $ let st = stimes :: Int -> S.ByteString -> S.ByteString
in
Expand Down
7 changes: 6 additions & 1 deletion tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,19 @@ import qualified Data.ByteString.Lazy.Internal as B (invariant)
#define BYTESTRING_TYPE B.ByteString
#endif

import Prelude hiding (head, tail)
import Data.Int
import Numeric.Natural (Natural)

import Text.Read

#endif

import Prelude hiding (head, tail)
import Control.Arrow
import Data.Char
import Data.Foldable
import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.Semigroup
import Data.String
import Data.Tuple
Expand Down Expand Up @@ -231,6 +232,10 @@ tests =
\x -> map B.unpack (B.inits x) === List.inits (B.unpack x)
, testProperty "tails" $
\x -> map B.unpack (B.tails x) === List.tails (B.unpack x)
, testProperty "initsNE" $
\x -> NE.map B.unpack (B.initsNE x) === NE.inits (B.unpack x)
, testProperty "tailsNE" $
\x -> NE.map B.unpack (B.tailsNE x) === NE.tails (B.unpack x)
#endif
, testProperty "all" $
\f x -> B.all f x === all f (B.unpack x)
Expand Down

0 comments on commit d4933c6

Please sign in to comment.