-
Notifications
You must be signed in to change notification settings - Fork 139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add lazy dropEnd and friends #395
Changes from 8 commits
d2c8c32
9421dcd
891f82f
c0607a3
ad00a22
a198e80
cd822ae
1ae4ab7
b688a25
413d3f1
c48d764
909e02b
4240db9
eedf0a2
7dc6214
6911faf
af2f32e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,12 +127,18 @@ module Data.ByteString.Lazy ( | |
|
||
-- ** Breaking strings | ||
take, | ||
takeEnd, | ||
drop, | ||
dropEnd, | ||
splitAt, | ||
takeWhile, | ||
takeWhileEnd, | ||
dropWhile, | ||
dropWhileEnd, | ||
span, | ||
spanEnd, | ||
break, | ||
breakEnd, | ||
group, | ||
groupBy, | ||
inits, | ||
|
@@ -220,13 +226,15 @@ import Prelude hiding | |
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) | ||
|
||
import qualified Data.List as L -- L for list/lazy | ||
import qualified Data.Bifunctor as BF | ||
import qualified Data.ByteString as P (ByteString) -- type name only | ||
import qualified Data.ByteString as S -- S for strict (hmm...) | ||
import qualified Data.ByteString.Internal as S | ||
import qualified Data.ByteString.Unsafe as S | ||
import Data.ByteString.Lazy.Internal | ||
|
||
import Control.Monad (mplus) | ||
import Data.Maybe (listToMaybe) | ||
import Data.Word (Word8) | ||
import Data.Int (Int64) | ||
import System.IO (Handle,openBinaryFile,stdin,stdout,withBinaryFile,IOMode(..) | ||
|
@@ -684,6 +692,28 @@ take i cs0 = take' i cs0 | |
then Chunk (S.take (fromIntegral n) c) Empty | ||
else Chunk c (take' (n - fromIntegral (S.length c)) cs) | ||
|
||
-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. | ||
-- Takes @n@ elements from end of bytestring. | ||
-- | ||
-- >>> takeEnd 3 "abcdefg" | ||
-- "efg" | ||
-- >>> takeEnd 0 "abcdefg" | ||
-- "" | ||
-- >>> takeEnd 4 "abc" | ||
-- "abc" | ||
-- | ||
-- @since 0.11.2.0 | ||
takeEnd :: Int64 -> ByteString -> ByteString | ||
takeEnd i _ | i <= 0 = Empty | ||
takeEnd i cs0 = takeEnd' i cs0 | ||
where takeEnd' 0 _ = Empty | ||
takeEnd' n cs = | ||
snd $ foldrChunks takeTuple (n,Empty) cs | ||
takeTuple _ (0, cs) = (0, cs) | ||
takeTuple c (n, cs) | ||
| n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs) | ||
| otherwise = (0, Chunk (S.takeEnd (fromIntegral n) c) cs) | ||
|
||
-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ | ||
-- elements, or @[]@ if @n > 'length' xs@. | ||
drop :: Int64 -> ByteString -> ByteString | ||
|
@@ -696,6 +726,43 @@ drop i cs0 = drop' i cs0 | |
then Chunk (S.drop (fromIntegral n) c) cs | ||
else drop' (n - fromIntegral (S.length c)) cs | ||
|
||
-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. | ||
-- Drops @n@ elements from end of bytestring. | ||
-- | ||
-- >>> dropEnd 3 "abcdefg" | ||
-- "abcd" | ||
-- >>> dropEnd 0 "abcdefg" | ||
-- "abcdefg" | ||
-- >>> dropEnd 4 "abc" | ||
-- "" | ||
-- | ||
-- @since 0.11.2.0 | ||
dropEnd :: Int64 -> ByteString -> ByteString | ||
dropEnd i p | i <= 0 = p | ||
dropEnd i p = go [] [] 0 0 p | ||
where go hss tss acc h (Chunk c cs) | ||
| h >= acc - i = go hss (c : tss) (acc + len c) h cs | ||
| otherwise = | ||
let (output, hss', tss', acc') = getOutput [] hss (c : tss) ( acc + len c) | ||
in L.foldl (flip chunk) (go hss' tss' acc' (hLen hss') cs) output | ||
go hss tss _ _ Empty = dropChunks (tss ++ L.reverse hss) (fromIntegral i) | ||
|
||
len c = fromIntegral (S.length c) | ||
hLen cs = maybe 0 len (listToMaybe cs) | ||
|
||
getOutput out [] [] acc = (out, [], [], acc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, it would be nice to document meaning of arguments and return values. What I can suggest is creating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe now with the use of the Deque, arguments are quite easy to follow. I'd happily add comments though if necessary |
||
getOutput out [] bss acc = getOutput out (L.reverse bss) [] acc | ||
getOutput out (x:xs) bss acc = | ||
if len x <= acc - i - len x | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks suspicious, there are There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, its redundant |
||
then getOutput (x:out) xs bss (acc - len x) | ||
else (out, x:xs, bss, acc) | ||
|
||
dropChunks [] _ = Empty | ||
dropChunks (c : cs) n = | ||
case S.length c of | ||
l | l <= fromIntegral n -> dropChunks cs (fromIntegral n - l) | ||
| otherwise -> L.foldl' (flip chunk) Empty (S.dropEnd (fromIntegral n) c : cs) | ||
|
||
-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. | ||
splitAt :: Int64 -> ByteString -> (ByteString, ByteString) | ||
splitAt i cs0 | i <= 0 = (Empty, cs0) | ||
|
@@ -722,6 +789,23 @@ takeWhile f = takeWhile' | |
n | n < S.length c -> Chunk (S.take n c) Empty | ||
| otherwise -> Chunk c (takeWhile' cs) | ||
|
||
-- | Returns the longest (possibly empty) suffix of elements | ||
-- satisfying the predicate. | ||
-- | ||
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. | ||
Comment on lines
+806
to
+809
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An example would be nice to have here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that would be helpful for users who will mostly not be aware of |
||
-- | ||
-- @since 0.11.2.0 | ||
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString | ||
takeWhileEnd f = takeWhileEnd' | ||
where takeWhileEnd' Empty = Empty | ||
takeWhileEnd' cs = | ||
snd $ foldrChunks takeTuple (True,Empty) cs | ||
takeTuple _ (False, bs) = (False,bs) | ||
takeTuple c (True,bs) = | ||
case S.takeWhileEnd f c of | ||
c' | S.length c' == S.length c -> (True, Chunk c bs) | ||
| otherwise -> (False, fromStrict c' `append` bs) | ||
|
||
-- | Similar to 'P.dropWhile', | ||
-- drops the longest (possibly empty) prefix of elements | ||
-- satisfying the predicate and returns the remainder. | ||
|
@@ -733,6 +817,25 @@ dropWhile f = dropWhile' | |
n | n < S.length c -> Chunk (S.drop n c) cs | ||
| otherwise -> dropWhile' cs | ||
|
||
-- | Similar to 'P.dropWhileEnd', | ||
-- drops the longest (possibly empty) suffix of elements | ||
-- satisfying the predicate and returns the remainder. | ||
-- | ||
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. | ||
3kyro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
-- | ||
-- @since 0.11.2.0 | ||
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString | ||
dropWhileEnd f = go [] | ||
where go acc (Chunk c cs) | ||
| f (S.last c) = go (c : acc) cs | ||
| otherwise = L.foldl (flip Chunk) (go [] cs) (c : acc) | ||
go acc Empty = dropAcc acc | ||
dropAcc [] = Empty | ||
dropAcc (x : xs) = | ||
case S.dropWhileEnd f x of | ||
x' | S.null x' -> dropAcc xs | ||
| otherwise -> L.foldl' (flip Chunk) Empty (x' : xs) | ||
|
||
-- | Similar to 'P.break', | ||
-- returns the longest (possibly empty) prefix of elements which __do not__ | ||
-- satisfy the predicate and the remainder of the string. | ||
|
@@ -750,6 +853,28 @@ break f = break' | |
| otherwise -> let (cs', cs'') = break' cs | ||
in (Chunk c cs', cs'') | ||
|
||
|
||
-- | Returns the longest (possibly empty) suffix of elements which __do not__ | ||
-- satisfy the predicate and the remainder of the string. | ||
-- | ||
-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. | ||
-- | ||
-- @since 0.11.2.0 | ||
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) | ||
breakEnd f = go [] | ||
where go acc (Chunk c cs) | ||
| f (S.last c) = L.foldl (flip $ BF.first . Chunk) (go [] cs) (c : acc) | ||
| otherwise = go (c : acc) cs | ||
go acc Empty = dropAcc acc | ||
dropAcc [] = (Empty, Empty) | ||
dropAcc (x : xs) = | ||
case S.breakEnd f x of | ||
(x', x'') | S.null x' -> let (y, y') = dropAcc xs | ||
in (y, y' `append` fromStrict x) | ||
| otherwise -> | ||
L.foldl' (flip $ BF.first . Chunk) (fromStrict x', fromStrict x'') xs | ||
|
||
|
||
-- | ||
-- TODO | ||
-- | ||
|
@@ -799,6 +924,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b) | |
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) | ||
span p = break (not . p) | ||
|
||
-- | Returns the longest (possibly empty) suffix of elements | ||
-- satisfying the predicate and the remainder of the string. | ||
-- | ||
-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. | ||
-- | ||
-- We have | ||
-- | ||
-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") | ||
-- | ||
-- and | ||
-- | ||
-- > spanEnd (not . isSpace) ps | ||
-- > == | ||
-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) | ||
-- | ||
-- @since 0.11.2.0 | ||
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) | ||
spanEnd p = breakEnd (not . p) | ||
|
||
-- | /O(n)/ Splits a 'ByteString' into components delimited by | ||
-- separators, where the predicate returns True for a separator element. | ||
-- The resulting components do not contain the separators. Two adjacent | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please add a comment, explaining meaning of arguments? AFAIU
hss
andtss
model a bidirectional queue (aka deque),acc
tracks total length ofhss
+tss
, andh
tracks the length ofhead hss
.Computing length of
head hss
is O(1) operation, so I think we can simplify a bit by not passingh
argument.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have removed the need for the
head
ofhss
as its length is already included in the accumulated length. Furthermore, in a Deque, calculating the length of itshead
might trigger a re-balancing which I think would not be ideal (e.g. it would always re balance on the second chunk). This seems a bit cleaner, although we might triggergetOutput
a bit more often than before.