haskell · Bodigrim · Jul 28, 2021 · May 26, 2021 · May 26, 2021 · May 27, 2021
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -127,12 +127,18 @@ module Data.ByteString.Lazy (
 
         -- ** Breaking strings
         take,
+        takeEnd,
         drop,
+        dropEnd,
         splitAt,
         takeWhile,
+        takeWhileEnd,
         dropWhile,
+        dropWhileEnd,
         span,
+        spanEnd,
         break,
+        breakEnd,
         group,
         groupBy,
         inits,
@@ -220,13 +226,15 @@ import Prelude hiding
     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
 
 import qualified Data.List              as L  -- L for list/lazy
+import qualified Data.Bifunctor         as BF
 import qualified Data.ByteString        as P  (ByteString) -- type name only
 import qualified Data.ByteString        as S  -- S for strict (hmm...)
 import qualified Data.ByteString.Internal as S
 import qualified Data.ByteString.Unsafe as S
 import Data.ByteString.Lazy.Internal
 
 import Control.Monad            (mplus)
+import Data.Maybe               (listToMaybe)
 import Data.Word                (Word8)
 import Data.Int                 (Int64)
 import System.IO                (Handle,openBinaryFile,stdin,stdout,withBinaryFile,IOMode(..)
@@ -684,6 +692,28 @@ take i cs0         = take' i cs0
             then Chunk (S.take (fromIntegral n) c) Empty
             else Chunk c (take' (n - fromIntegral (S.length c)) cs)
 
+-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
+-- Takes @n@ elements from end of bytestring.
+--
+-- >>> takeEnd 3 "abcdefg"
+-- "efg"
+-- >>> takeEnd 0 "abcdefg"
+-- ""
+-- >>> takeEnd 4 "abc"
+-- "abc"
+--
+-- @since 0.11.2.0
+takeEnd :: Int64 -> ByteString -> ByteString
+takeEnd i _ | i <= 0 = Empty
+takeEnd i cs0        = takeEnd' i cs0
+  where takeEnd' 0 _         = Empty
+        takeEnd' n cs        =
+            snd $ foldrChunks takeTuple (n,Empty) cs
+        takeTuple _ (0, cs)  = (0, cs)
+        takeTuple c (n, cs)
+            | n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
+            | otherwise      = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)
+
 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 -- elements, or @[]@ if @n > 'length' xs@.
 drop  :: Int64 -> ByteString -> ByteString
@@ -696,6 +726,43 @@ drop i cs0 = drop' i cs0
             then Chunk (S.drop (fromIntegral n) c) cs
             else drop' (n - fromIntegral (S.length c)) cs
 
+-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
+-- Drops @n@ elements from end of bytestring.
+--
+-- >>> dropEnd 3 "abcdefg"
+-- "abcd"
+-- >>> dropEnd 0 "abcdefg"
+-- "abcdefg"
+-- >>> dropEnd 4 "abc"
+-- ""
+--
+-- @since 0.11.2.0
+dropEnd :: Int64 -> ByteString -> ByteString
+dropEnd i p | i <= 0 = p
+dropEnd i p = go [] [] 0 0 p
+  where go hss tss acc h (Chunk c cs)
+            | h >= acc - i  = go hss (c : tss) (acc + len c) h cs
+            | otherwise =
+              let (output, hss', tss', acc') = getOutput [] hss (c : tss) ( acc + len c)
+                in L.foldl (flip chunk) (go hss' tss' acc' (hLen hss') cs) output
+        go hss tss _ _ Empty = dropChunks (tss ++ L.reverse hss) (fromIntegral i)
+
+        len c = fromIntegral (S.length c)
+        hLen cs = maybe 0 len (listToMaybe cs)
+
+        getOutput out [] [] acc = (out, [], [], acc)
+        getOutput out [] bss acc = getOutput out (L.reverse bss) [] acc
+        getOutput out (x:xs) bss acc =
+            if len x <= acc - i - len x
+               then getOutput (x:out) xs bss (acc - len x)
+               else (out, x:xs, bss, acc)
+
+        dropChunks [] _ = Empty
+        dropChunks (c : cs) n =
+            case S.length c of
+                 l | l <= fromIntegral n -> dropChunks cs (fromIntegral n - l)
+                   | otherwise -> L.foldl' (flip chunk) Empty (S.dropEnd (fromIntegral n) c : cs)
+
 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
 splitAt i cs0 | i <= 0 = (Empty, cs0)
@@ -722,6 +789,23 @@ takeWhile f = takeWhile'
             n | n < S.length c -> Chunk (S.take n c) Empty
               | otherwise      -> Chunk c (takeWhile' cs)
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate.
+--
+-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
+takeWhileEnd f = takeWhileEnd'
+  where takeWhileEnd' Empty = Empty
+        takeWhileEnd' cs    =
+            snd $ foldrChunks takeTuple (True,Empty) cs
+        takeTuple _ (False, bs) = (False,bs)
+        takeTuple c (True,bs)   =
+           case S.takeWhileEnd f c of
+                c' | S.length c' == S.length c -> (True, Chunk c bs)
+                   | otherwise                 -> (False, fromStrict c' `append` bs)
+
 -- | Similar to 'P.dropWhile',
 -- drops the longest (possibly empty) prefix of elements
 -- satisfying the predicate and returns the remainder.
@@ -733,6 +817,25 @@ dropWhile f = dropWhile'
             n | n < S.length c -> Chunk (S.drop n c) cs
               | otherwise      -> dropWhile' cs
 
+-- | Similar to 'P.dropWhileEnd',
+-- drops the longest (possibly empty) suffix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
+dropWhileEnd f = go []
+  where go acc (Chunk c cs)
+            | f (S.last c) = go (c : acc) cs
+            | otherwise    = L.foldl (flip Chunk) (go [] cs) (c : acc)
+        go acc Empty       = dropAcc acc
+        dropAcc []         = Empty
+        dropAcc (x : xs)   =
+            case S.dropWhileEnd f x of
+                 x' | S.null x' -> dropAcc xs
+                    | otherwise -> L.foldl' (flip Chunk) Empty (x' : xs)
+
 -- | Similar to 'P.break',
 -- returns the longest (possibly empty) prefix of elements which __do not__
 -- satisfy the predicate and the remainder of the string.
@@ -750,6 +853,28 @@ break f = break'
               | otherwise      -> let (cs', cs'') = break' cs
                                    in (Chunk c cs', cs'')
 
+
+-- | Returns the longest (possibly empty) suffix of elements which __do not__
+-- satisfy the predicate and the remainder of the string.
+--
+-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
+--
+-- @since 0.11.2.0
+breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+breakEnd  f = go []
+  where go acc (Chunk c cs)
+            | f (S.last c) = L.foldl (flip $ BF.first . Chunk) (go [] cs) (c : acc)
+            | otherwise = go (c : acc) cs
+        go acc Empty = dropAcc acc
+        dropAcc [] = (Empty, Empty)
+        dropAcc (x : xs) =
+            case S.breakEnd f x of
+                 (x', x'') | S.null x' -> let (y, y') = dropAcc xs
+                                           in (y, y' `append` fromStrict x)
+                           | otherwise ->
+                                L.foldl' (flip $ BF.first . Chunk) (fromStrict x', fromStrict x'') xs
+
+
 --
 -- TODO
 --
@@ -799,6 +924,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 span p = break (not . p)
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate and the remainder of the string.
+--
+-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
+--
+-- We have
+--
+-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    ==
+-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
+--
+-- @since 0.11.2.0
+spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd p = breakEnd (not . p)
+
 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 -- separators, where the predicate returns True for a separator element.
 -- The resulting components do not contain the separators.  Two adjacent

diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs
@@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (
 
         -- ** Breaking strings
         take,
+        takeEnd,
         drop,
+        dropEnd,
         splitAt,
         takeWhile,
+        takeWhileEnd,
         dropWhile,
+        dropWhileEnd,
         span,
+        spanEnd,
         break,
+        breakEnd,
         group,
         groupBy,
         inits,
@@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
 import Data.ByteString.Lazy
         (fromChunks, toChunks
         ,empty,null,length,tail,init,append,reverse,transpose,cycle
-        ,concat,take,drop,splitAt,intercalate
+        ,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
         ,isPrefixOf,isSuffixOf,group,inits,tails,copy
         ,stripPrefix,stripSuffix
         ,hGetContents, hGet, hPut, getContents
@@ -456,22 +462,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
 takeWhile f = L.takeWhile (f . w2c)
 {-# INLINE takeWhile #-}
 
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate.
+--
+-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
+takeWhileEnd f = L.takeWhileEnd (f . w2c)
+{-# INLINE takeWhileEnd #-}
+
 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
 dropWhile :: (Char -> Bool) -> ByteString -> ByteString
 dropWhile f = L.dropWhile (f . w2c)
 {-# INLINE dropWhile #-}
 
+-- | Similar to 'P.dropWhileEnd',
+-- drops the longest (possibly empty) suffix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
+--
+-- @since 0.11.2.0
+dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
+dropWhileEnd f = L.dropWhileEnd (f . w2c)
+{-# INLINE dropWhileEnd #-}
+
 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
 break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
 break f = L.break (f . w2c)
 {-# INLINE break #-}
 
+-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
+--
+-- breakEnd p == spanEnd (not.p)
+--
+-- @since 0.11.2.0
+breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+breakEnd f = L.breakEnd (f . w2c)
+{-# INLINE breakEnd #-}
+
 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
 span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
 span f = L.span (f . w2c)
 {-# INLINE span #-}
 
+-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
+-- We have
+--
+-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) ps
+-- >    ==
+-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
+--
+-- @since 0.11.2.0
+spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
+spanEnd f = L.spanEnd (f . w2c)
+{-# INLINE spanEnd #-}
+
 {-
 -- | 'breakChar' breaks its ByteString argument at the first occurence
 -- of the specified Char. It is more efficient than 'break' as it is

diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs
@@ -177,11 +177,12 @@ tests =
     \(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (/= c) x) === break (/= c) (B.unpack x)
   , testProperty "break span" $
     \f x -> B.break f x === B.span (not . f) x
-#ifndef BYTESTRING_LAZY
   , testProperty "breakEnd" $
     \f x -> B.breakEnd f x === swap ((B.reverse *** B.reverse) (B.break f (B.reverse x)))
   , testProperty "breakEnd" $
     \f x -> B.breakEnd f x === B.spanEnd (not . f) x
+
+#ifndef BYTESTRING_LAZY
   , testProperty "break breakSubstring" $
     \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x
   , testProperty "breakSubstring" $
@@ -248,7 +249,6 @@ tests =
     \x -> B.unpack (B.takeWhile isSpace x) === takeWhile isSpace (B.unpack x)
 #endif
 
-#ifndef BYTESTRING_LAZY
   , testProperty "dropEnd" $
     \n x -> B.dropEnd n x === B.take (B.length x - n) x
   , testProperty "dropWhileEnd" $
@@ -257,7 +257,6 @@ tests =
     \n x -> B.takeEnd n x === B.drop (B.length x - n) x
   , testProperty "takeWhileEnd" $
     \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))
-#endif
 
 #ifdef BYTESTRING_LAZY
   , testProperty "invariant" $
@@ -278,6 +277,14 @@ tests =
     \x (toElem -> c) -> B.compareLength (B.snoc x c <> undefined) (B.length x) === GT
   , testProperty "compareLength 5" $
     \x n -> B.compareLength x n === compare (B.length x) n
+  , testProperty "dropEnd lazy" $
+    \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c
+  , testProperty "dropWhileEnd lazy" $
+    \(toElem -> c) -> B.take 1 (B.dropWhileEnd (const False) (B.singleton c <> undefined)) === B.singleton c
+  , testProperty "breakEnd lazy" $
+    \(toElem -> c) -> B.take 1 (fst $ B.breakEnd (const True) (B.singleton c <> undefined)) === B.singleton c
+  , testProperty "spanEnd lazy" $
+    \(toElem -> c) -> B.take 1 (fst $ B.spanEnd (const False) (B.singleton c <> undefined)) === B.singleton c
 #endif
 
   , testProperty "length" $
@@ -350,10 +357,8 @@ tests =
     \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (== c) x) === span (== c) (B.unpack x)
   , testProperty "span /=" $
     \(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (/= c) x) === span (/= c) (B.unpack x)
-#ifndef BYTESTRING_LAZY
   , testProperty "spanEnd" $
     \f x -> B.spanEnd f x === swap ((B.reverse *** B.reverse) (B.span f (B.reverse x)))
-#endif
   , testProperty "split" $
     \(toElem -> c) x -> map B.unpack (B.split c x) === split c (B.unpack x)
   , testProperty "split empty" $