Skip to content

Commit

Permalink
Add lazy dropEnd and friends (#395)
Browse files Browse the repository at this point in the history
* Add lazy dropEnd and friends

Fixes #306.
Utilizes an eager strategy as described in the original issue. Chunks
are eagerly evaluated but the overall structure and pointers are kept
intact.

* Fix `since` version number

* Use `foldrChunks` instead of `length` based checks

* Add review changes

- Pass Int64 to accumulators
- Clean up `breakEnd`
- Implement lazy version of `dropWhileEnd`
- Add dropWhileEnd lazy test

* Fix `breakEnd` and `spanEnd` lazyness

* Make `dropEnd` lazier

* Formatting

* Fix lazy `dropEnd`

* Add `Deque` module

`dropEnd` now uses `Deque` for handling the accumulated bytestrigs

* Normalize function names

* Return `Maybe (S.ByteString, Deque)` from pops

Plus all other review suggestions

* Add review changes

* Rename `dropElements` to `dropEndBytes`

* Add examples + style fixes

* Update Data/ByteString/Lazy/Internal/Deque.hs

Co-authored-by: Simon Jakobi <simon.jakobi@gmail.com>

* Replace `elemLength` with `byteLength`

* Upadate examples

Co-authored-by: Simon Jakobi <simon.jakobi@gmail.com>
  • Loading branch information
2 people authored and Bodigrim committed Aug 1, 2021
1 parent 3a390c7 commit ebd7089
Show file tree
Hide file tree
Showing 5 changed files with 296 additions and 6 deletions.
166 changes: 166 additions & 0 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,18 @@ module Data.ByteString.Lazy (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -220,10 +226,12 @@ import Prelude hiding
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)

import qualified Data.List as L -- L for list/lazy
import qualified Data.Bifunctor as BF
import qualified Data.ByteString as P (ByteString) -- type name only
import qualified Data.ByteString as S -- S for strict (hmm...)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Lazy.Internal.Deque as D
import Data.ByteString.Lazy.Internal

import Control.Monad (mplus)
Expand Down Expand Up @@ -684,6 +692,28 @@ take i cs0 = take' i cs0
then Chunk (S.take (fromIntegral n) c) Empty
else Chunk c (take' (n - fromIntegral (S.length c)) cs)

-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
-- Takes @n@ elements from end of bytestring.
--
-- >>> takeEnd 3 "abcdefg"
-- "efg"
-- >>> takeEnd 0 "abcdefg"
-- ""
-- >>> takeEnd 4 "abc"
-- "abc"
--
-- @since 0.11.2.0
takeEnd :: Int64 -> ByteString -> ByteString
takeEnd i _ | i <= 0 = Empty
takeEnd i cs0 = takeEnd' i cs0
where takeEnd' 0 _ = Empty
takeEnd' n cs =
snd $ foldrChunks takeTuple (n,Empty) cs
takeTuple _ (0, cs) = (0, cs)
takeTuple c (n, cs)
| n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
| otherwise = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)

-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or @[]@ if @n > 'length' xs@.
drop :: Int64 -> ByteString -> ByteString
Expand All @@ -696,6 +726,57 @@ drop i cs0 = drop' i cs0
then Chunk (S.drop (fromIntegral n) c) cs
else drop' (n - fromIntegral (S.length c)) cs

-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
-- Drops @n@ elements from end of bytestring.
--
-- >>> dropEnd 3 "abcdefg"
-- "abcd"
-- >>> dropEnd 0 "abcdefg"
-- "abcdefg"
-- >>> dropEnd 4 "abc"
-- ""
--
-- @since 0.11.2.0
dropEnd :: Int64 -> ByteString -> ByteString
dropEnd i p | i <= 0 = p
dropEnd i p = go D.empty p
where go :: D.Deque -> ByteString -> ByteString
go deque (Chunk c cs)
| D.byteLength deque < i = go (D.snoc c deque) cs
| otherwise =
let (output, deque') = getOutput empty (D.snoc c deque)
in foldrChunks Chunk (go deque' cs) output
go deque Empty = fromDeque $ dropEndBytes deque i

len c = fromIntegral (S.length c)

-- get a `ByteString` from all the front chunks of the accumulating deque
-- for which we know they won't be dropped
getOutput :: ByteString -> D.Deque -> (ByteString, D.Deque)
getOutput out deque = case D.popFront deque of
Nothing -> (reverseChunks out, deque)
Just (x, deque') | D.byteLength deque' >= i ->
getOutput (Chunk x out) deque'
_ -> (reverseChunks out, deque)

-- reverse a `ByteString`s chunks, keeping all internal `S.ByteString`s
-- unchanged
reverseChunks = foldlChunks (flip Chunk) empty

-- drop n elements from the rear of the accumulating `deque`
dropEndBytes :: D.Deque -> Int64 -> D.Deque
dropEndBytes deque n = case D.popRear deque of
Nothing -> deque
Just (deque', x) | len x <= n -> dropEndBytes deque' (n - len x)
| otherwise ->
D.snoc (S.dropEnd (fromIntegral n) x) deque'

-- build a lazy ByteString from an accumulating `deque`
fromDeque :: D.Deque -> ByteString
fromDeque deque =
L.foldr chunk Empty (D.front deque) `append`
L.foldl' (flip chunk) Empty (D.rear deque)

-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
splitAt i cs0 | i <= 0 = (Empty, cs0)
Expand All @@ -722,6 +803,27 @@ takeWhile f = takeWhile'
n | n < S.length c -> Chunk (S.take n c) Empty
| otherwise -> Chunk c (takeWhile' cs)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- >>> {-# LANGUAGE OverloadedLists #-)
-- >>> takeWhileEnd even [1,2,3,4,6]
-- [4,6]
--
-- @since 0.11.2.0
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhileEnd f = takeWhileEnd'
where takeWhileEnd' Empty = Empty
takeWhileEnd' cs =
snd $ foldrChunks takeTuple (True,Empty) cs
takeTuple _ (False, bs) = (False,bs)
takeTuple c (True,bs) =
case S.takeWhileEnd f c of
c' | S.length c' == S.length c -> (True, Chunk c bs)
| otherwise -> (False, fromStrict c' `append` bs)

-- | Similar to 'P.dropWhile',
-- drops the longest (possibly empty) prefix of elements
-- satisfying the predicate and returns the remainder.
Expand All @@ -733,6 +835,29 @@ dropWhile f = dropWhile'
n | n < S.length c -> Chunk (S.drop n c) cs
| otherwise -> dropWhile' cs

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- >>> {-# LANGUAGE OverloadedLists #-)
-- >>> dropWhileEnd even [1,2,3,4,6]
-- [1,2,3]
--
-- @since 0.11.2.0
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhileEnd f = go []
where go acc (Chunk c cs)
| f (S.last c) = go (c : acc) cs
| otherwise = L.foldl (flip Chunk) (go [] cs) (c : acc)
go acc Empty = dropEndBytes acc
dropEndBytes [] = Empty
dropEndBytes (x : xs) =
case S.dropWhileEnd f x of
x' | S.null x' -> dropEndBytes xs
| otherwise -> L.foldl' (flip Chunk) Empty (x' : xs)

-- | Similar to 'P.break',
-- returns the longest (possibly empty) prefix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
Expand All @@ -750,6 +875,28 @@ break f = break'
| otherwise -> let (cs', cs'') = break' cs
in (Chunk c cs', cs'')


-- | Returns the longest (possibly empty) suffix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
--
-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
--
-- @since 0.11.2.0
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = go []
where go acc (Chunk c cs)
| f (S.last c) = L.foldl (flip $ BF.first . Chunk) (go [] cs) (c : acc)
| otherwise = go (c : acc) cs
go acc Empty = dropEndBytes acc
dropEndBytes [] = (Empty, Empty)
dropEndBytes (x : xs) =
case S.breakEnd f x of
(x', x'') | S.null x' -> let (y, y') = dropEndBytes xs
in (y, y' `append` fromStrict x)
| otherwise ->
L.foldl' (flip $ BF.first . Chunk) (fromStrict x', fromStrict x'') xs


--
-- TODO
--
Expand Down Expand Up @@ -799,6 +946,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span p = break (not . p)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate and the remainder of the string.
--
-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
--
-- We have
--
-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd p = breakEnd (not . p)

-- | /O(n)/ Splits a 'ByteString' into components delimited by
-- separators, where the predicate returns True for a separator element.
-- The resulting components do not contain the separators. Two adjacent
Expand Down
54 changes: 53 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,drop,splitAt,intercalate
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
Expand Down Expand Up @@ -449,22 +455,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
takeWhile f = L.takeWhile (f . w2c)
{-# INLINE takeWhile #-}

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
takeWhileEnd f = L.takeWhileEnd (f . w2c)
{-# INLINE takeWhileEnd #-}

-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile f = L.dropWhile (f . w2c)
{-# INLINE dropWhile #-}

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
dropWhileEnd f = L.dropWhileEnd (f . w2c)
{-# INLINE dropWhileEnd #-}

-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break f = L.break (f . w2c)
{-# INLINE break #-}

-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
--
-- breakEnd p == spanEnd (not.p)
--
-- @since 0.11.2.0
breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = L.breakEnd (f . w2c)
{-# INLINE breakEnd #-}

-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span f = L.span (f . w2c)
{-# INLINE span #-}

-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
--
-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd f = L.spanEnd (f . w2c)
{-# INLINE spanEnd #-}

{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
Expand Down
66 changes: 66 additions & 0 deletions Data/ByteString/Lazy/Internal/Deque.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{- |
A Deque used for accumulating `S.ByteString`s in the lazy
version of `dropEnd`.
-}
module Data.ByteString.Lazy.Internal.Deque (
Deque (..),
empty,
null,
cons,
snoc,
popFront,
popRear,
) where

import qualified Data.ByteString as S
import Data.Int (Int64)
import Prelude hiding (head, length, null)

-- A `S.ByteString` Deque used as an accumulator for lazy
-- Bytestring operations
data Deque = Deque
{ front :: [S.ByteString]
, rear :: [S.ByteString]
, -- | Total length in bytes
byteLength :: !Int64
}

-- An empty Deque
empty :: Deque
empty = Deque [] [] 0

-- Is the `Deque` empty?
-- O(1)
null :: Deque -> Bool
null deque = byteLength deque == 0

-- Add a `S.ByteString` to the front of the `Deque`
-- O(1)
cons :: S.ByteString -> Deque -> Deque
cons x (Deque fs rs acc) = Deque (x : fs) rs (acc + len x)

-- Add a `S.ByteString` to the rear of the `Deque`
-- O(1)
snoc :: S.ByteString -> Deque -> Deque
snoc x (Deque fs rs acc) = Deque fs (x : rs) (acc + len x)

len :: S.ByteString -> Int64
len x = fromIntegral $ S.length x

-- Pop a `S.ByteString` from the front of the `Deque`
-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
-- O(1) , occasionally O(n)
popFront :: Deque -> Maybe (S.ByteString, Deque)
popFront (Deque [] rs acc) = case reverse rs of
[] -> Nothing
x : xs -> Just (x, Deque xs [] (acc - len x))
popFront (Deque (x : xs) rs acc) = Just (x, Deque xs rs (acc - len x))

-- Pop a `S.ByteString` from the rear of the `Deque`
-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
-- O(1) , occasionally O(n)
popRear :: Deque -> Maybe (Deque, S.ByteString)
popRear (Deque fs [] acc) = case reverse fs of
[] -> Nothing
x : xs -> Just (Deque [] xs (acc - len x), x)
popRear (Deque fs (x : xs) acc) = Just (Deque fs xs (acc - len x), x)
Loading

0 comments on commit ebd7089

Please sign in to comment.