Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lazy dropEnd and friends #395

Merged
merged 17 commits into from
Jul 28, 2021
Merged
153 changes: 153 additions & 0 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,18 @@ module Data.ByteString.Lazy (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -220,10 +226,12 @@ import Prelude hiding
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)

import qualified Data.List as L -- L for list/lazy
import qualified Data.Bifunctor as BF
import qualified Data.ByteString as P (ByteString) -- type name only
import qualified Data.ByteString as S -- S for strict (hmm...)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Lazy.Internal.Deque as D
import Data.ByteString.Lazy.Internal

import Control.Monad (mplus)
Expand Down Expand Up @@ -684,6 +692,28 @@ take i cs0 = take' i cs0
then Chunk (S.take (fromIntegral n) c) Empty
else Chunk c (take' (n - fromIntegral (S.length c)) cs)

-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
-- Takes @n@ elements from end of bytestring.
--
-- >>> takeEnd 3 "abcdefg"
-- "efg"
-- >>> takeEnd 0 "abcdefg"
-- ""
-- >>> takeEnd 4 "abc"
-- "abc"
--
-- @since 0.11.2.0
takeEnd :: Int64 -> ByteString -> ByteString
takeEnd i _ | i <= 0 = Empty
takeEnd i cs0 = takeEnd' i cs0
where takeEnd' 0 _ = Empty
takeEnd' n cs =
snd $ foldrChunks takeTuple (n,Empty) cs
takeTuple _ (0, cs) = (0, cs)
takeTuple c (n, cs)
| n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
| otherwise = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)

-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or @[]@ if @n > 'length' xs@.
drop :: Int64 -> ByteString -> ByteString
Expand All @@ -696,6 +726,52 @@ drop i cs0 = drop' i cs0
then Chunk (S.drop (fromIntegral n) c) cs
else drop' (n - fromIntegral (S.length c)) cs

-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
-- Drops @n@ elements from end of bytestring.
--
-- >>> dropEnd 3 "abcdefg"
-- "abcd"
-- >>> dropEnd 0 "abcdefg"
-- "abcdefg"
-- >>> dropEnd 4 "abc"
-- ""
--
-- @since 0.11.2.0
dropEnd :: Int64 -> ByteString -> ByteString
dropEnd i p | i <= 0 = p
dropEnd i p = go D.empty p
3kyro marked this conversation as resolved.
Show resolved Hide resolved
where go :: D.Deque -> ByteString -> ByteString
go deque (Chunk c cs)
| D.elemLength deque < i = go (D.snoc c deque) cs
| otherwise =
let (output, deque') = getOutput [] (D.snoc c deque)
in L.foldl (flip chunk) (go deque' cs) output
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for using a lazy foldl here instead of foldl'?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The lazy fold here is necessary so that we don't force go deque cs`. However this has now been changed (see above review comment)

go deque Empty = dropElements deque (fromIntegral i)

len c = fromIntegral (S.length c)

-- get all `S.ByteString` from the front of the accumulating deque
-- for which we know they won't be dropped
getOutput :: [S.ByteString] -> D.Deque -> ([S.ByteString], D.Deque)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of operating on lists of strict bytestrings, it seems convenient to simply use the lazy bytestring type. Is there a reason not to do that here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, seems nicer. The only problem is that there is no lazy left fold for chunks, and so we need to first reverse chunks and fold from the right. Might be even better to pay traveral than have the space leaked by the foldl?

getOutput out deque = case D.popFront deque of
Nothing -> (out, deque)
Just (x, deque') | D.elemLength deque' >= i -> getOutput (x:out) deque'
| otherwise -> (out, deque)

-- drop n elements from the rear of the accumulating `deque`
sjakobi marked this conversation as resolved.
Show resolved Hide resolved
dropElements :: D.Deque -> Int -> ByteString
sjakobi marked this conversation as resolved.
Show resolved Hide resolved
dropElements deque n = case D.popRear deque of
Nothing -> Empty
Just (x, deque') | len x <= n -> dropElements deque' (n - len x)
| otherwise ->
fromDeque (D.snoc (S.dropEnd n x) deque')

-- build a lazy ByteString from an accumulating `deque`
fromDeque :: D.Deque -> ByteString
fromDeque deque =
L.foldr chunk Empty (D.front deque) `append`
L.foldl' (flip chunk) Empty (D.rear deque)

-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
splitAt i cs0 | i <= 0 = (Empty, cs0)
Expand All @@ -722,6 +798,23 @@ takeWhile f = takeWhile'
n | n < S.length c -> Chunk (S.take n c) Empty
| otherwise -> Chunk c (takeWhile' cs)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
Comment on lines +806 to +809
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example would be nice to have here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've used Chunk (pack [1,2]) (Chunk (pack [3,4,6])) Empty as an example of a lazy bytestring. Hope it's not too verbose, but textual representation of a bytestring is always tricky.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that would be helpful for users who will mostly not be aware of ByteString's internal constructors. If you want to represent the bytes as numbers, you can use the OverloadedLists syntax, e.g. [1,2,3,4,6].

--
-- @since 0.11.2.0
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhileEnd f = takeWhileEnd'
where takeWhileEnd' Empty = Empty
takeWhileEnd' cs =
snd $ foldrChunks takeTuple (True,Empty) cs
takeTuple _ (False, bs) = (False,bs)
takeTuple c (True,bs) =
case S.takeWhileEnd f c of
c' | S.length c' == S.length c -> (True, Chunk c bs)
| otherwise -> (False, fromStrict c' `append` bs)

-- | Similar to 'P.dropWhile',
-- drops the longest (possibly empty) prefix of elements
-- satisfying the predicate and returns the remainder.
Expand All @@ -733,6 +826,25 @@ dropWhile f = dropWhile'
n | n < S.length c -> Chunk (S.drop n c) cs
| otherwise -> dropWhile' cs

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
3kyro marked this conversation as resolved.
Show resolved Hide resolved
--
-- @since 0.11.2.0
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhileEnd f = go []
where go acc (Chunk c cs)
| f (S.last c) = go (c : acc) cs
| otherwise = L.foldl (flip Chunk) (go [] cs) (c : acc)
go acc Empty = dropElements acc
dropElements [] = Empty
dropElements (x : xs) =
case S.dropWhileEnd f x of
x' | S.null x' -> dropElements xs
| otherwise -> L.foldl' (flip Chunk) Empty (x' : xs)

-- | Similar to 'P.break',
-- returns the longest (possibly empty) prefix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
Expand All @@ -750,6 +862,28 @@ break f = break'
| otherwise -> let (cs', cs'') = break' cs
in (Chunk c cs', cs'')


-- | Returns the longest (possibly empty) suffix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
--
-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
--
-- @since 0.11.2.0
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = go []
where go acc (Chunk c cs)
| f (S.last c) = L.foldl (flip $ BF.first . Chunk) (go [] cs) (c : acc)
| otherwise = go (c : acc) cs
go acc Empty = dropElements acc
dropElements [] = (Empty, Empty)
dropElements (x : xs) =
case S.breakEnd f x of
(x', x'') | S.null x' -> let (y, y') = dropElements xs
in (y, y' `append` fromStrict x)
| otherwise ->
L.foldl' (flip $ BF.first . Chunk) (fromStrict x', fromStrict x'') xs


--
-- TODO
--
Expand Down Expand Up @@ -799,6 +933,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span p = break (not . p)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate and the remainder of the string.
--
-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
--
-- We have
--
-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd p = breakEnd (not . p)

-- | /O(n)/ Splits a 'ByteString' into components delimited by
-- separators, where the predicate returns True for a separator element.
-- The resulting components do not contain the separators. Two adjacent
Expand Down
54 changes: 53 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,drop,splitAt,intercalate
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
Expand Down Expand Up @@ -456,22 +462,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
takeWhile f = L.takeWhile (f . w2c)
{-# INLINE takeWhile #-}

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
takeWhileEnd f = L.takeWhileEnd (f . w2c)
{-# INLINE takeWhileEnd #-}

-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile f = L.dropWhile (f . w2c)
{-# INLINE dropWhile #-}

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
dropWhileEnd f = L.dropWhileEnd (f . w2c)
{-# INLINE dropWhileEnd #-}

-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break f = L.break (f . w2c)
{-# INLINE break #-}

-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
--
-- breakEnd p == spanEnd (not.p)
--
-- @since 0.11.2.0
breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = L.breakEnd (f . w2c)
{-# INLINE breakEnd #-}

-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span f = L.span (f . w2c)
{-# INLINE span #-}

-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
--
-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd f = L.spanEnd (f . w2c)
{-# INLINE spanEnd #-}

{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
Expand Down
62 changes: 62 additions & 0 deletions Data/ByteString/Lazy/Internal/Deque.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
module Data.ByteString.Lazy.Internal.Deque (
3kyro marked this conversation as resolved.
Show resolved Hide resolved
Deque (..),
empty,
null,
cons,
snoc,
popFront,
popRear,
) where

import qualified Data.ByteString as S
import Data.Int (Int64)
import Prelude hiding (head, length, null)

-- A `S.ByteString` Deque used as an accumulator for lazy
-- Bytestring operations
data Deque = Deque
{ front :: [S.ByteString]
, rear :: [S.ByteString]
, -- | Accumulated length of deque's elements
elemLength :: Int64
sjakobi marked this conversation as resolved.
Show resolved Hide resolved
}

-- An empty Deque
empty :: Deque
empty = Deque [] [] 0

-- Is the `Deque` empty?
-- O(1)
null :: Deque -> Bool
null deque = elemLength deque == 0

-- Add a `S.ByteString` to the front of the `Deque`
-- O(1)
cons :: S.ByteString -> Deque -> Deque
cons x (Deque fs rs acc) = Deque (x : fs) rs (acc + len x)

-- Add a `S.ByteString` to the rear of the `Deque`
-- O(1)
snoc :: S.ByteString -> Deque -> Deque
snoc x (Deque fs rs acc) = Deque fs (x : rs) (acc + len x)

len :: S.ByteString -> Int64
len x = fromIntegral $ S.length x

-- Pop a `S.ByteString` from the front of the `Deque`
-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
-- O(1) , occasionally O(n)
popFront :: Deque -> Maybe (S.ByteString, Deque)
popFront (Deque [] rs acc) = case reverse rs of
[] -> Nothing
x : xs -> Just (x, Deque xs [] (acc - len x))
popFront (Deque (x : xs) rs acc) = Just (x, Deque xs rs (acc - len x))

-- Pop a `S.ByteString` from the rear of the `Deque`
-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
-- O(1) , occasionally O(n)
popRear :: Deque -> Maybe (S.ByteString, Deque)
sjakobi marked this conversation as resolved.
Show resolved Hide resolved
popRear (Deque fs [] acc) = case reverse fs of
[] -> Nothing
x : xs -> Just (x, Deque [] xs (acc - len x))
popRear (Deque fs (x : xs) acc) = Just (x, Deque fs xs (acc - len x))
1 change: 1 addition & 0 deletions bytestring.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ library
Data.ByteString.Builder.Internal
Data.ByteString.Builder.Prim.Internal
other-modules: Data.ByteString.Builder.ASCII
Data.ByteString.Lazy.Internal.Deque
3kyro marked this conversation as resolved.
Show resolved Hide resolved
Data.ByteString.Builder.Prim.ASCII
Data.ByteString.Builder.Prim.Binary
Data.ByteString.Builder.Prim.Internal.Base16
Expand Down
Loading