haskell · Bodigrim · Jul 28, 2021 · May 26, 2021 · May 26, 2021 · May 27, 2021
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -127,12 +127,18 @@ module Data.ByteString.Lazy (
 
         -- ** Breaking strings
         take,
+        takeEnd,
         drop,
+        dropEnd,
         splitAt,
         takeWhile,
+        takeWhileEnd,
         dropWhile,
+        dropWhileEnd,
         span,
+        spanEnd,
         break,
+        breakEnd,
         group,
         groupBy,
         inits,
@@ -220,10 +226,12 @@ import Prelude hiding
     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 
 import qualified Data.List              as L  -- L for list/lazy
+import qualified Data.Bifunctor         as BF
 import qualified Data.ByteString        as P  (ByteString) -- type name only
 import qualified Data.ByteString        as S  -- S for strict (hmm...)
 import qualified Data.ByteString.Internal as S
 import qualified Data.ByteString.Unsafe as S
+import qualified Data.ByteString.Lazy.Internal.Deque as D
 import Data.ByteString.Lazy.Internal
 
 import Control.Monad            (mplus)
@@ -684,6 +692,28 @@ take i cs0         = take' i cs0
             then Chunk (S.take (fromIntegral n) c) Empty
             else Chunk c (take' (n - fromIntegral (S.length c)) cs)
 
+-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
+-- Takes @n@ elements from end of bytestring.
+--
+-- >>> takeEnd 3 "abcdefg"
+-- "efg"
+-- >>> takeEnd 0 "abcdefg"
+-- ""
+-- >>> takeEnd 4 "abc"
+-- "abc"
+--
+-- @since 0.11.2.0
+takeEnd :: Int64 -> ByteString -> ByteString
+takeEnd i _ | i <= 0 = Empty
+takeEnd i cs0        = takeEnd' i cs0
+  where takeEnd' 0 _         = Empty
+        takeEnd' n cs        =
+            snd $ foldrChunks takeTuple (n,Empty) cs
+        takeTuple _ (0, cs)  = (0, cs)
+        takeTuple c (n, cs)
+            | n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
+            | otherwise      = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)
+
 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 -- elements, or @[]@ if @n > 'length' xs@.
 drop  :: Int64 -> ByteString -> ByteString
@@ -696,6 +726,52 @@ drop i cs0 = drop' i cs0
             then Chunk (S.drop (fromIntegral n) c) cs
             else drop' (n - fromIntegral (S.length c)) cs
 
+-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
+-- Drops @n@ elements from end of bytestring.
+--
+-- >>> dropEnd 3 "abcdefg"
+-- "abcd"
+-- >>> dropEnd 0 "abcdefg"
+-- "abcdefg"
+-- >>> dropEnd 4 "abc"
+-- ""
+--
+-- @since 0.11.2.0
+dropEnd :: Int64 -> ByteString -> ByteString
+dropEnd i p | i <= 0 = p
+dropEnd i p = go D.empty p
+  where go :: D.Deque -> ByteString -> ByteString
+        go deque (Chunk c cs)
+            | D.elemLength deque < i = go (D.snoc c deque) cs
+            | otherwise              =
+                  let (output, deque') = getOutput [] (D.snoc c deque)
+                    in L.foldl (flip chunk) (go deque' cs) output
+        go deque Empty               = dropElements deque (fromIntegral i)
+
+        len c = fromIntegral (S.length c)
+
+        -- get all `S.ByteString` from the front of the accumulating deque
+        -- for which we know they won't be dropped
+        getOutput :: [S.ByteString] -> D.Deque -> ([S.ByteString], D.Deque)
+        getOutput out deque = case D.popFront deque of
+            Nothing                       -> (out, deque)
+            Just (x, deque') | D.elemLength deque' >= i -> getOutput (x:out) deque'
+                             | otherwise  -> (out, deque)
+
+        -- drop n elements from the rear of the accumulating `deque`
+        dropElements :: D.Deque -> Int -> ByteString
+        dropElements deque n = case D.popRear deque of
+            Nothing                       -> Empty
+            Just (x, deque') | len x <= n -> dropElements deque' (n - len x)
+                             | otherwise  ->
+                                fromDeque (D.snoc (S.dropEnd n x) deque')
+
+        -- build a lazy ByteString from an accumulating `deque`
+        fromDeque :: D.Deque -> ByteString
+        fromDeque deque =
+            L.foldr chunk Empty (D.front deque) `append`
+            L.foldl' (flip chunk) Empty (D.rear deque)
+
 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 splitAt i cs0 | i <= 0 = (Empty, cs0)
@@ -722,6 +798,23 @@ takeWhile f = takeWhile'
             n | n < S.length c -> Chunk (S.take n c) Empty
               | otherwise      -> Chunk c (takeWhile' cs)
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate.
+--
+-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
+takeWhileEnd f = takeWhileEnd'
+  where takeWhileEnd' Empty = Empty
+        takeWhileEnd' cs    =
+            snd $ foldrChunks takeTuple (True,Empty) cs
+        takeTuple _ (False, bs) = (False,bs)
+        takeTuple c (True,bs)   =
+           case S.takeWhileEnd f c of
+                c' | S.length c' == S.length c -> (True, Chunk c bs)
+                   | otherwise                 -> (False, fromStrict c' `append` bs)
+
 -- | Similar to 'P.dropWhile',
 -- drops the longest (possibly empty) prefix of elements
 -- satisfying the predicate and returns the remainder.
@@ -733,6 +826,25 @@ dropWhile f = dropWhile'
             n | n < S.length c -> Chunk (S.drop n c) cs
               | otherwise      -> dropWhile' cs
 
+-- | Similar to 'P.dropWhileEnd',
+-- drops the longest (possibly empty) suffix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
+dropWhileEnd f = go []
+  where go acc (Chunk c cs)
+            | f (S.last c) = go (c : acc) cs
+            | otherwise    = L.foldl (flip Chunk) (go [] cs) (c : acc)
+        go acc Empty       = dropElements acc
+        dropElements []         = Empty
+        dropElements (x : xs)   =
+            case S.dropWhileEnd f x of
+                 x' | S.null x' -> dropElements xs
+                    | otherwise -> L.foldl' (flip Chunk) Empty (x' : xs)
+
 -- | Similar to 'P.break',
 -- returns the longest (possibly empty) prefix of elements which __do not__
 -- satisfy the predicate and the remainder of the string.
@@ -750,6 +862,28 @@ break f = break'
               | otherwise      -> let (cs', cs'') = break' cs
                                    in (Chunk c cs', cs'')
 
+
+-- | Returns the longest (possibly empty) suffix of elements which __do not__
+-- satisfy the predicate and the remainder of the string.
+--
+-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
+--
+-- @since 0.11.2.0
+breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+breakEnd  f = go []
+  where go acc (Chunk c cs)
+            | f (S.last c) = L.foldl (flip $ BF.first . Chunk) (go [] cs) (c : acc)
+            | otherwise = go (c : acc) cs
+        go acc Empty = dropElements acc
+        dropElements [] = (Empty, Empty)
+        dropElements (x : xs) =
+            case S.breakEnd f x of
+                 (x', x'') | S.null x' -> let (y, y') = dropElements xs
+                                           in (y, y' `append` fromStrict x)
+                           | otherwise ->
+                                L.foldl' (flip $ BF.first . Chunk) (fromStrict x', fromStrict x'') xs
+
+
 --
 -- TODO
 --
@@ -799,6 +933,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 span p = break (not . p)
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate and the remainder of the string.
+--
+-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
+--
+-- We have
+--
+-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    ==
+-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
+--
+-- @since 0.11.2.0
+spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd p = breakEnd (not . p)
+
 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 -- separators, where the predicate returns True for a separator element.
 -- The resulting components do not contain the separators.  Two adjacent

diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs
@@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (
 
         -- ** Breaking strings
         take,
+        takeEnd,
         drop,
+        dropEnd,
         splitAt,
         takeWhile,
+        takeWhileEnd,
         dropWhile,
+        dropWhileEnd,
         span,
+        spanEnd,
         break,
+        breakEnd,
         group,
         groupBy,
         inits,
@@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
 import Data.ByteString.Lazy
         (fromChunks, toChunks
         ,empty,null,length,tail,init,append,reverse,transpose,cycle
-        ,concat,take,drop,splitAt,intercalate
+        ,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
         ,isPrefixOf,isSuffixOf,group,inits,tails,copy
         ,stripPrefix,stripSuffix
         ,hGetContents, hGet, hPut, getContents
@@ -456,22 +462,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
 takeWhile f = L.takeWhile (f . w2c)
 {-# INLINE takeWhile #-}
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate.
+--
+-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
+takeWhileEnd f = L.takeWhileEnd (f . w2c)
+{-# INLINE takeWhileEnd #-}
+
 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 dropWhile :: (Char -> Bool) -> ByteString -> ByteString
 dropWhile f = L.dropWhile (f . w2c)
 {-# INLINE dropWhile #-}
 
+-- | Similar to 'P.dropWhileEnd',
+-- drops the longest (possibly empty) suffix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
+dropWhileEnd f = L.dropWhileEnd (f . w2c)
+{-# INLINE dropWhileEnd #-}
+
 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
 break f = L.break (f . w2c)
 {-# INLINE break #-}
 
+-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
+--
+-- breakEnd p == spanEnd (not.p)
+--
+-- @since 0.11.2.0
+breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+breakEnd f = L.breakEnd (f . w2c)
+{-# INLINE breakEnd #-}
+
 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
 span f = L.span (f . w2c)
 {-# INLINE span #-}
 
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    ==
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
+--
+-- @since 0.11.2.0
+spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd f = L.spanEnd (f . w2c)
+{-# INLINE spanEnd #-}
+
 {-
 -- | 'breakChar' breaks its ByteString argument at the first occurence
 -- of the specified Char. It is more efficient than 'break' as it is

diff --git a/Data/ByteString/Lazy/Internal/Deque.hs b/Data/ByteString/Lazy/Internal/Deque.hs
@@ -0,0 +1,62 @@
+module Data.ByteString.Lazy.Internal.Deque (
+    Deque (..),
+    empty,
+    null,
+    cons,
+    snoc,
+    popFront,
+    popRear,
+) where
+
+import qualified Data.ByteString as S
+import Data.Int (Int64)
+import Prelude hiding (head, length, null)
+
+-- A `S.ByteString` Deque used as an accumulator for lazy
+-- Bytestring operations
+data Deque = Deque
+    { front :: [S.ByteString]
+    , rear :: [S.ByteString]
+    , -- | Accumulated length of deque's elements
+      elemLength :: Int64
+    }
+
+-- An empty Deque
+empty :: Deque
+empty = Deque [] [] 0
+
+-- Is the `Deque` empty?
+-- O(1)
+null :: Deque -> Bool
+null deque = elemLength deque == 0
+
+-- Add a `S.ByteString` to the front of the `Deque`
+-- O(1)
+cons :: S.ByteString -> Deque -> Deque
+cons x (Deque fs rs acc) = Deque (x : fs) rs (acc + len x)
+
+-- Add a `S.ByteString` to the rear of the `Deque`
+-- O(1)
+snoc :: S.ByteString -> Deque -> Deque
+snoc x (Deque fs rs acc) = Deque fs (x : rs) (acc + len x)
+
+len :: S.ByteString -> Int64
+len x = fromIntegral $ S.length x
+
+-- Pop a `S.ByteString` from the front of the `Deque`
+-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
+-- O(1) , occasionally O(n)
+popFront :: Deque -> Maybe (S.ByteString, Deque)
+popFront (Deque [] rs acc) = case reverse rs of
+    [] -> Nothing
+    x : xs -> Just (x, Deque xs [] (acc - len x))
+popFront (Deque (x : xs) rs acc) = Just (x, Deque xs rs (acc - len x))
+
+-- Pop a `S.ByteString` from the rear of the `Deque`
+-- Returns the bytestring and the updated Deque, or Nothing if the Deque is empty
+-- O(1) , occasionally O(n)
+popRear :: Deque -> Maybe (S.ByteString, Deque)
+popRear (Deque fs [] acc) = case reverse fs of
+    [] -> Nothing
+    x : xs -> Just (x, Deque [] xs (acc - len x))
+popRear (Deque fs (x : xs) acc) = Just (x, Deque fs xs (acc - len x))
diff --git a/bytestring.cabal b/bytestring.cabal
@@ -90,6 +90,7 @@ library
                      Data.ByteString.Builder.Internal
                      Data.ByteString.Builder.Prim.Internal
   other-modules:     Data.ByteString.Builder.ASCII
+                     Data.ByteString.Lazy.Internal.Deque
                      Data.ByteString.Builder.Prim.ASCII
                      Data.ByteString.Builder.Prim.Binary
                      Data.ByteString.Builder.Prim.Internal.Base16