Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NonEmpty variants of inits and tails #557

Merged
merged 5 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 40 additions & 6 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ module Data.ByteString (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -235,6 +237,8 @@ import Data.ByteString.Lazy.Internal (fromStrict, toStrict)
import Data.ByteString.Unsafe

import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty (NonEmpty(..))

import Data.Word (Word8)

Expand Down Expand Up @@ -427,7 +431,7 @@ last ps@(BS x l)
unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)
{-# INLINE last #-}

-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
-- | /O(1)/ Returns all the elements of a 'ByteString' except the last one.
-- An exception will be thrown in the case of an empty ByteString.
--
-- This is a partial function, consider using 'unsnoc' instead.
Expand Down Expand Up @@ -1686,17 +1690,47 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
-- ---------------------------------------------------------------------
-- Special lists

-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
inits :: ByteString -> [ByteString]
inits (BS x l) = [BS x n | n <- [0..l]]
-- see Note [Avoid NonEmpty combinators]
inits bs = NE.toList $! initsNE bs

-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
--
-- @since 0.11.4.0
initsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators]
initsNE (BS x len) = empty :| [BS x n | n <- [1..len]]

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
tails p | null p = [empty]
| otherwise = p : tails (unsafeTail p)
-- see Note [Avoid NonEmpty combinators]
tails bs = NE.toList $! tailsNE bs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
--
-- @since 0.11.4.0
tailsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators]
tailsNE p | null p = empty :| []
| otherwise = p :| tails (unsafeTail p)

-- less efficent spacewise: tails (BS x l) = [BS (plusForeignPtr x n) (l-n) | n <- [0..l]]

{-
Note [Avoid NonEmpty combinators]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

As of base-4.17, most of the NonEmpty API is surprisingly lazy.
clyring marked this conversation as resolved.
Show resolved Hide resolved
Using it without forcing the arguments yourself is just begging GHC
to make your code waste time allocating useless selector thunks.
This may change in the future. See also this CLC issue:
https://github.com/haskell/core-libraries-committee/issues/107
But until then, "refactor" with care!
-}



-- ---------------------------------------------------------------------
-- ** Ordered 'ByteString's

Expand Down
4 changes: 3 additions & 1 deletion Data/ByteString/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ module Data.ByteString.Char8 (
groupBy,
inits,
tails,
initsNE,
tailsNE,
strip,
stripPrefix,
stripSuffix,
Expand Down Expand Up @@ -261,7 +263,7 @@ import qualified Data.ByteString.Unsafe as B

-- Listy functions transparently exported
import Data.ByteString (null,length,tail,init,append
,inits,tails,reverse,transpose
,inits,tails,initsNE,tailsNE,reverse,transpose
,concat,take,takeEnd,drop,dropEnd,splitAt
,intercalate,sort,isPrefixOf,isSuffixOf
,isInfixOf,stripPrefix,stripSuffix
Expand Down
46 changes: 35 additions & 11 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ module Data.ByteString.Lazy (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -228,6 +230,8 @@ import Prelude hiding
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)

import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Bifunctor as BF
import qualified Data.ByteString as P (ByteString) -- type name only
import qualified Data.ByteString as S -- S for strict (hmm...)
Expand Down Expand Up @@ -380,7 +384,7 @@ last (Chunk c0 cs0) = go c0 cs0
go _ (Chunk c cs) = go c cs
-- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)

-- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
-- | /O(n\/c)/ Returns all the elements of a 'ByteString' except the last one.
--
-- This is a partial function, consider using 'unsnoc' instead.
init :: HasCallStack => ByteString -> ByteString
Expand Down Expand Up @@ -1429,19 +1433,39 @@ unzip ls = (pack (List.map fst ls), pack (List.map snd ls))
-- ---------------------------------------------------------------------
-- Special lists

-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
-- | Returns all initial segments of the given 'ByteString', shortest first.
inits :: ByteString -> [ByteString]
inits = (Empty :) . inits'
where inits' Empty = []
inits' (Chunk c cs) = List.map (`Chunk` Empty) (List.drop 1 (S.inits c))
++ List.map (Chunk c) (inits' cs)
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
inits bs = NE.toList $! initsNE bs

-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
-- | Returns all initial segments of the given 'ByteString', shortest first.
--
-- @since 0.11.4.0
initsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
initsNE = (Empty :|) . inits' id
where
inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString]
-- inits' f bs === map f (tail (inits bs))
inits' _ Empty = []
inits' f (Chunk c@(S.BS x len) cs)
= [f (S.BS x n `Chunk` Empty) | n <- [1..len]]
++ inits' (f . Chunk c) cs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
tails Empty = [Empty]
tails cs@(Chunk c cs')
| S.length c == 1 = cs : tails cs'
| otherwise = cs : tails (Chunk (S.unsafeTail c) cs')
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
tails bs = NE.toList $! tailsNE bs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
--
-- @since 0.11.4.0
tailsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
tailsNE bs = case uncons bs of
Nothing -> Empty :| []
Just (_, tl) -> bs :| tails tl
clyring marked this conversation as resolved.
Show resolved Hide resolved


-- ---------------------------------------------------------------------
-- Low level constructors
Expand Down
4 changes: 3 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ module Data.ByteString.Lazy.Char8 (
groupBy,
inits,
tails,
initsNE,
tailsNE,
stripPrefix,
stripSuffix,

Expand Down Expand Up @@ -233,7 +235,7 @@ import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,isPrefixOf,isSuffixOf,group,inits,tails,initsNE,tailsNE,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
,hGetNonBlocking, hPutNonBlocking
Expand Down
15 changes: 15 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ lazyByteStringData :: L.ByteString
lazyByteStringData = case S.splitAt (nRepl `div` 2) byteStringData of
(bs1, bs2) -> L.fromChunks [bs1, bs2]

{-# NOINLINE smallChunksData #-}
smallChunksData :: L.ByteString
smallChunksData
= L.fromChunks [S.take sz (S.drop n byteStringData)
| let sz = 48, n <- [0, sz .. S.length byteStringData]]

{-# NOINLINE byteStringChunksData #-}
byteStringChunksData :: [S.ByteString]
byteStringChunksData = map (S.pack . replicate (4 ) . fromIntegral) intData
Expand Down Expand Up @@ -404,6 +410,15 @@ main = do
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
]
]
, bgroup "inits"
[ bench "strict" $ nf S.inits byteStringData
, bench "lazy" $ nf L.inits lazyByteStringData
, bench "lazy (small chunks)" $ nf L.inits smallChunksData
]
, bgroup "tails"
[ bench "strict" $ nf S.tails byteStringData
, bench "lazy" $ nf L.tails lazyByteStringData
]
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
, bgroup "stimes" $ let st = stimes :: Int -> S.ByteString -> S.ByteString
in
Expand Down
7 changes: 6 additions & 1 deletion tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,19 @@ import qualified Data.ByteString.Lazy.Internal as B (invariant)
#define BYTESTRING_TYPE B.ByteString
#endif

import Prelude hiding (head, tail)
import Data.Int
import Numeric.Natural (Natural)

import Text.Read

#endif

import Prelude hiding (head, tail)
import Control.Arrow
import Data.Char
import Data.Foldable
import qualified Data.List as List
import qualified Data.List.NonEmpty as NE
import Data.Semigroup
import Data.String
import Data.Tuple
Expand Down Expand Up @@ -231,6 +232,10 @@ tests =
\x -> map B.unpack (B.inits x) === List.inits (B.unpack x)
, testProperty "tails" $
\x -> map B.unpack (B.tails x) === List.tails (B.unpack x)
, testProperty "initsNE" $
\x -> NE.map B.unpack (B.initsNE x) === NE.inits (B.unpack x)
, testProperty "tailsNE" $
\x -> NE.map B.unpack (B.tailsNE x) === NE.tails (B.unpack x)
#endif
, testProperty "all" $
\f x -> B.all f x === all f (B.unpack x)
Expand Down