From 93da14e5e79ea08d9602390e29a1bc118dfb1430 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Tue, 14 Apr 2020 04:01:28 +0530 Subject: [PATCH 01/30] Modify the current Fold type to handle terminating Folds This commit also includes: - Changing all other modules accordingly. - Adding additional helper functions to make life easy - Using strict types as fold's intermediat result - Respecting line length of 80 --- src/Streamly/Internal/Data/Array.hs | 22 +- .../Data/Array/Prim/MutTypesInclude.hs | 49 +- .../Data/Array/Prim/Pinned/Mut/Types.hs | 12 +- .../Internal/Data/Array/Prim/TypesInclude.hs | 26 +- .../Data/Array/Storable/Foreign/Mut/Types.hs | 43 +- src/Streamly/Internal/Data/Fold.hs | 425 +++++++++++------- src/Streamly/Internal/Data/Fold/Types.hs | 220 ++++++--- src/Streamly/Internal/Data/Parser/ParserD.hs | 82 ++-- .../Internal/Data/Parser/ParserD/Types.hs | 31 +- src/Streamly/Internal/Data/Sink.hs | 4 +- src/Streamly/Internal/Data/SmallArray.hs | 13 +- src/Streamly/Internal/Data/Stream/IsStream.hs | 16 +- src/Streamly/Internal/Data/Stream/Prelude.hs | 15 +- src/Streamly/Internal/Data/Stream/StreamD.hs | 203 +++++---- .../Internal/Data/Stream/StreamD/Type.hs | 10 +- src/Streamly/Internal/Data/Stream/StreamK.hs | 17 + src/Streamly/Internal/Data/Unfold.hs | 9 +- src/Streamly/Internal/FileSystem/File.hs | 2 +- src/Streamly/Internal/Network/Inet/TCP.hs | 9 +- .../Test/Internal/Data/Parser/ParserD.hs | 102 +++-- test/Streamly/Test/Prelude.hs | 7 +- 21 files changed, 809 insertions(+), 508 deletions(-) diff --git a/src/Streamly/Internal/Data/Array.hs b/src/Streamly/Internal/Data/Array.hs index c34de24454..5439adef79 100644 --- a/src/Streamly/Internal/Data/Array.hs +++ b/src/Streamly/Internal/Data/Array.hs @@ -55,12 +55,14 @@ import Data.Functor.Identity (runIdentity) import Data.Primitive.Array hiding (fromList, fromListN) import qualified GHC.Exts as Exts +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import Streamly.Internal.Data.Unfold.Types (Unfold(..)) import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.Stream.StreamK.Type (IsStream) import Streamly.Internal.Data.Stream.Serial (SerialT) import qualified Streamly.Internal.Data.Stream.StreamD as D +import qualified Streamly.Internal.Data.Fold.Types as FL {-# NOINLINE bottomElement #-} bottomElement :: a @@ -109,13 +111,13 @@ writeN limit = Fold step initial extract where initial = do marr <- liftIO $ newArray limit bottomElement - return (marr, 0) - step (marr, i) x - | i == limit = return (marr, i) + return (Tuple' marr 0) + step (Tuple' marr i) x + | i == limit = fmap FL.Done $ liftIO $ freezeArray marr 0 i | otherwise = do liftIO $ writeArray marr i x - return (marr, i + 1) - extract (marr, len) = liftIO $ freezeArray marr 0 len + FL.partialM $ Tuple' marr (i + 1) + extract (Tuple' marr len) = liftIO $ freezeArray marr 0 len {-# INLINE_NORMAL write #-} write :: MonadIO m => Fold m a (Array a) @@ -123,18 +125,18 @@ write = Fold step initial extract where initial = do marr <- liftIO $ newArray 0 bottomElement - return (marr, 0, 0) - step (marr, i, capacity) x + return (Tuple3' marr 0 0) + step (Tuple3' marr i capacity) x | i == capacity = let newCapacity = max (capacity * 2) 1 in do newMarr <- liftIO $ newArray newCapacity bottomElement liftIO $ copyMutableArray newMarr 0 marr 0 i liftIO $ writeArray newMarr i x - return (newMarr, i + 1, newCapacity) + FL.partialM $ Tuple3' newMarr (i + 1) newCapacity | otherwise = do liftIO $ writeArray marr i x - return (marr, i + 1, capacity) - extract (marr, len, _) = liftIO $ freezeArray marr 0 len + FL.partialM $ Tuple3' marr (i + 1) capacity + extract (Tuple3' marr len _) = liftIO $ freezeArray marr 0 len {-# INLINE_NORMAL fromStreamDN #-} fromStreamDN :: MonadIO m => Int -> D.Stream m a -> m (Array a) diff --git a/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs b/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs index ee309ac5f6..ad1ac482d8 100644 --- a/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs +++ b/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs @@ -11,7 +11,8 @@ import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.SVar (adaptState) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) -import qualified Streamly.Internal.Data.Stream.StreamD.Type as D +import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Stream.StreamD as D import GHC.Exts import Control.Monad.Primitive @@ -135,7 +136,7 @@ shrinkArray (Array arr#) (I# n#) = -- /Internal/ {-# INLINE_NORMAL write #-} write :: (MonadIO m, Prim a) => Fold m a (Array a) -write = Fold step initial extract +write = FL.mkAccumM step initial extract where @@ -169,13 +170,13 @@ writeN limit = Fold step initial extract marr <- newArray limit return $ Tuple' marr 0 - step (Tuple' marr i) x - | i == limit = return $ Tuple' marr i + extract (Tuple' marr len) = shrinkArray marr len >> return marr + + step s@(Tuple' marr i) x + | i == limit = FL.Done <$> extract s | otherwise = do unsafeWriteIndex marr i x - return $ Tuple' marr (i + 1) - - extract (Tuple' marr len) = shrinkArray marr len >> return marr + FL.partialM $ Tuple' marr (i + 1) -- Use Tuple' instead? data ArrayUnsafe a = ArrayUnsafe @@ -191,7 +192,7 @@ data ArrayUnsafe a = ArrayUnsafe -- /Internal/ {-# INLINE_NORMAL writeNUnsafe #-} writeNUnsafe :: (MonadIO m, Prim a) => Int -> Fold m a (Array a) -writeNUnsafe n = Fold step initial extract +writeNUnsafe n = FL.mkAccumM step initial extract where @@ -212,13 +213,9 @@ fromStreamDN limit str = do shrinkArray marr n return marr -{-# INLINE runFold #-} -runFold :: (Monad m) => Fold m a b -> D.Stream m a -> m b -runFold (Fold step begin done) = D.foldlMx' step begin done - {-# INLINE fromStreamD #-} fromStreamD :: (MonadIO m, Prim a) => D.Stream m a -> m (Array a) -fromStreamD str = runFold write str +fromStreamD str = D.runFold write str {-# INLINABLE fromListNM #-} fromListNM :: (MonadIO m, Prim a) => Int -> [a] -> m (Array a) @@ -373,17 +370,22 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = extract (Tuple' Nothing r1) = extract1 r1 extract (Tuple' (Just buf) r1) = do r <- step1 r1 buf - extract1 r + case r of + FL.Partial rr -> extract1 rr + FL.Done _ -> return () step (Tuple' Nothing r1) arr = do len <- byteLength arr if len >= n then do r <- step1 r1 arr - extract1 r - r1' <- initial1 - return (Tuple' Nothing r1') - else return (Tuple' (Just arr) r1) + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple' Nothing r1' + else FL.partialM $ Tuple' (Just arr) r1 step (Tuple' (Just buf) r1) arr = do blen <- byteLength buf @@ -394,7 +396,10 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = if len >= n then do r <- step1 r1 buf' - extract1 r - r1' <- initial1 - return (Tuple' Nothing r1') - else return (Tuple' (Just buf') r1) + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple' Nothing r1' + else FL.partialM $ Tuple' (Just buf') r1 diff --git a/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs b/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs index 4c64faa0ea..cbaab959b4 100644 --- a/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs +++ b/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs @@ -143,15 +143,15 @@ writeNAligned align limit = Fold step initial extract initial = do marr <- newAlignedArray limit align - return (marr, 0) + return $ Tuple' marr 0 - step (marr, i) x - | i == limit = return (marr, i) + extract (Tuple' marr len) = shrinkArray marr len >> return marr + + step s@(Tuple' marr i) x + | i == limit = FL.Done <$> extract s | otherwise = do unsafeWriteIndex marr i x - return (marr, i + 1) - - extract (marr, len) = shrinkArray marr len >> return marr + FL.partialM $ Tuple' marr (i + 1) ------------------------------------------------------------------------------- -- Mutation with pointers diff --git a/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs b/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs index b68afd7ae5..17a9348a27 100644 --- a/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs +++ b/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs @@ -610,20 +610,25 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = extract (Tuple3' (Just' buf) boff r1) = do nArr <- unsafeFreeze buf r <- step1 r1 (slice nArr 0 boff) - extract1 r + case r of + FL.Partial rr -> extract1 rr + FL.Done _ -> return () step (Tuple3' Nothing' _ r1) arr = if length arr >= nElem then do r <- step1 r1 arr - extract1 r - r1' <- initial1 - return (Tuple3' Nothing' 0 r1') + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple3' Nothing' 0 r1' else do buf <- MA.newArray nElem noff <- spliceInto buf 0 arr - return (Tuple3' (Just' buf) noff r1) + FL.partialM $ Tuple3' (Just' buf) noff r1 step (Tuple3' (Just' buf) boff r1) arr = do noff <- spliceInto buf boff arr @@ -632,10 +637,13 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = then do nArr <- unsafeFreeze buf r <- step1 r1 (slice nArr 0 noff) - extract1 r - r1' <- initial1 - return (Tuple3' Nothing' 0 r1') - else return (Tuple3' (Just' buf) noff r1) + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple3' Nothing' 0 r1' + else FL.partialM $ Tuple3' (Just' buf) noff r1 data SplitState s arr = Initial s diff --git a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs index 8555008ce2..401cbc1cd2 100644 --- a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs +++ b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs @@ -122,6 +122,7 @@ import qualified Data.Foldable as F import qualified Streamly.Internal.Foreign.Malloc as Malloc import qualified Streamly.Internal.Data.Stream.StreamD.Type as D import qualified Streamly.Internal.Data.Stream.StreamK as K +import qualified Streamly.Internal.Data.Fold as FL import qualified GHC.Exts as Exts import Prelude hiding (length, foldr, read, unlines, splitAt) @@ -675,17 +676,22 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = extract (Tuple' Nothing r1) = extract1 r1 extract (Tuple' (Just buf) r1) = do r <- step1 r1 buf - extract1 r + case r of + FL.Partial rr -> extract1 rr + FL.Done _ -> return () - step (Tuple' Nothing r1) arr = do + step (Tuple' Nothing r1) arr = let len = byteLength arr - if len >= n - then do - r <- step1 r1 arr - extract1 r - r1' <- initial1 - return (Tuple' Nothing r1') - else return (Tuple' (Just arr) r1) + in if len >= n + then do + r <- step1 r1 arr + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple' Nothing r1' + else FL.partialM $ Tuple' (Just arr) r1 step (Tuple' (Just buf) r1) arr = do let len = byteLength buf + byteLength arr @@ -697,10 +703,13 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = if len >= n then do r <- step1 r1 buf'' - extract1 r - r1' <- initial1 - return (Tuple' Nothing r1') - else return (Tuple' (Just buf'') r1) + case r of + FL.Done _ -> FL.doneM () + FL.Partial s -> do + extract1 s + r1' <- initial1 + FL.partialM $ Tuple' Nothing r1' + else FL.partialM $ Tuple' (Just buf'') r1 #if !defined(mingw32_HOST_OS) data GatherState s arr @@ -1077,10 +1086,10 @@ writeNAllocWith alloc n = Fold step initial extract where initial = liftIO $ alloc (max n 0) - step arr@(Array _ end bound) _ | end == bound = return arr + step arr@(Array _ end bound) _ | end == bound = FL.doneM arr step (Array start end bound) x = do liftIO $ poke end x - return $ Array start (end `plusPtr` sizeOf (undefined :: a)) bound + FL.partialM $ Array start (end `plusPtr` sizeOf (undefined :: a)) bound -- XXX note that shirkToFit does not maintain alignment, in case we are -- using aligned allocation. extract = return -- liftIO . shrinkToFit @@ -1139,7 +1148,7 @@ writeNUnsafe n = Fold step initial extract return $ ArrayUnsafe start end step (ArrayUnsafe start end) x = do liftIO $ poke end x - return $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) + FL.partialM $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) extract (ArrayUnsafe start end) = return $ Array start end end -- liftIO . shrinkToFit -- XXX The realloc based implementation needs to make one extra copy if we use @@ -1158,7 +1167,7 @@ writeNUnsafe n = Fold step initial extract toArrayMinChunk :: forall m a. (MonadIO m, Storable a) => Int -> Int -> Fold m a (Array a) -- toArrayMinChunk n = FL.mapM spliceArrays $ toArraysOf n -toArrayMinChunk alignSize elemCount = Fold step initial extract +toArrayMinChunk alignSize elemCount = FL.mkAccumM step initial extract where diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index 5146cde7ce..d70ab8f9dd 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -7,15 +7,18 @@ -- Stability : experimental -- Portability : GHC --- Also see the "Streamly.Internal.Data.Sink" module that provides specialized left folds --- that discard the outputs. +-- Also see the "Streamly.Internal.Data.Sink" module that provides specialized +-- left folds that discard the outputs. -- -- IMPORTANT: keep the signatures consistent with the folds in Streamly.Prelude module Streamly.Internal.Data.Fold ( -- * Fold Type - Fold (..) + Step (..) + , Fold (..) + , partialM + , doneM , hoist , generally @@ -24,10 +27,14 @@ module Streamly.Internal.Data.Fold -- , init -- * Fold Creation Utilities - , mkPure - , mkPureId + , mkAccum + , mkAccum_ + , mkAccumM + , mkAccumM_ , mkFold - , mkFoldId + , mkFold_ + , mkFoldM + , mkFoldM_ -- ** Full Folds , drain @@ -203,7 +210,7 @@ module Streamly.Internal.Data.Fold ) where -import Control.Monad (join, void) +import Control.Monad (void, join) import Control.Monad.IO.Class (MonadIO(..)) import Data.Functor.Identity (Identity(..)) import Data.Int (Int64) @@ -213,9 +220,9 @@ import Data.Semigroup (Semigroup((<>))) #endif import Streamly.Internal.Data.Pipe.Types (Pipe (..), PipeState(..)) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) -import Streamly.Internal.Data.Either.Strict (Either'(..)) import Streamly.Internal.Data.Maybe.Strict (Maybe'(..), toMaybe) +import qualified Streamly.Internal.Data.Fold.Types as FL import qualified Streamly.Internal.Data.Pipe.Types as Pipe import qualified Data.Map.Strict as Map import qualified Prelude @@ -234,14 +241,51 @@ import Streamly.Internal.Data.Fold.Types -- Smart constructors ------------------------------------------------------------------------------ +-- | Make a non terminating fold using a pure step function, a pure initial +-- state and a pure state extraction function. +-- +-- /Internal/ +-- +{-# INLINE mkAccum #-} +mkAccum :: Monad m => (s -> a -> s) -> s -> (s -> b) -> Fold m a b +mkAccum step initial extract = + Fold (\s a -> partialM $ step s a) (return initial) (return . extract) + +-- | Make a non terminating fold using a pure step function and a pure initial +-- state. The final state extracted is identical to the intermediate state. +-- +-- /Internal/ +-- +{-# INLINE mkAccum_ #-} +mkAccum_ :: Monad m => (b -> a -> b) -> b -> Fold m a b +mkAccum_ step initial = mkAccum step initial id + +-- | Make a non terminating fold with an effectful step function and initial +-- state, and a state extraction function. +-- +-- /Internal/ +-- +{-# INLINE mkAccumM #-} +mkAccumM :: Functor m => (s -> a -> m s) -> m s -> (s -> m b) -> Fold m a b +mkAccumM step = Fold (\s a -> Partial <$> step s a) + +-- | Make a non terminating fold with an effectful step function and initial +-- state. The final state extracted is identical to the intermediate state. +-- +-- /Internal/ +-- +{-# INLINE mkAccumM_ #-} +mkAccumM_ :: Monad m => (b -> a -> m b) -> m b -> Fold m a b +mkAccumM_ step initial = mkAccumM step initial return + -- | Make a fold using a pure step function, a pure initial state and -- a pure state extraction function. -- -- /Internal/ -- -{-# INLINE mkPure #-} -mkPure :: Monad m => (s -> a -> s) -> s -> (s -> b) -> Fold m a b -mkPure step initial extract = +{-# INLINE mkFold #-} +mkFold :: Monad m => (s -> a -> Step s b) -> s -> (s -> b) -> Fold m a b +mkFold step initial extract = Fold (\s a -> return $ step s a) (return initial) (return . extract) -- | Make a fold using a pure step function and a pure initial state. The @@ -249,31 +293,31 @@ mkPure step initial extract = -- -- /Internal/ -- -{-# INLINE mkPureId #-} -mkPureId :: Monad m => (b -> a -> b) -> b -> Fold m a b -mkPureId step initial = mkPure step initial id +{-# INLINE mkFold_ #-} +mkFold_ :: Monad m => (b -> a -> Step b b) -> b -> Fold m a b +mkFold_ step initial = mkFold step initial id -- | Make a fold with an effectful step function and initial state, and a state -- extraction function. -- --- > mkFold = Fold +-- > mkFoldM = Fold -- -- We can just use 'Fold' but it is provided for completeness. -- -- /Internal/ -- -{-# INLINE mkFold #-} -mkFold :: (s -> a -> m s) -> m s -> (s -> m b) -> Fold m a b -mkFold = Fold +{-# INLINE mkFoldM #-} +mkFoldM :: (s -> a -> m (Step s b)) -> m s -> (s -> m b) -> Fold m a b +mkFoldM = Fold -- | Make a fold with an effectful step function and initial state. The final -- state extracted is identical to the intermediate state. -- -- /Internal/ -- -{-# INLINE mkFoldId #-} -mkFoldId :: Monad m => (b -> a -> m b) -> m b -> Fold m a b -mkFoldId step initial = Fold step initial return +{-# INLINE mkFoldM_ #-} +mkFoldM_ :: Monad m => (b -> a -> m (Step b b)) -> m b -> Fold m a b +mkFoldM_ step initial = Fold step initial return ------------------------------------------------------------------------------ -- hoist @@ -303,9 +347,14 @@ generally = hoist (return . runIdentity) -- @since 0.7.0 {-# INLINE sequence #-} sequence :: Monad m => Fold m a (m b) -> Fold m a b -sequence (Fold step initial extract) = Fold step initial extract' +sequence (Fold step initial extract) = Fold step' initial extract' where - extract' x = join (extract x) + step' s a = do + res <- step s a + case res of + Partial x -> partialM x + Done b -> b >>= return . Done + extract' = join . extract -- | Map a monadic function on the output of a fold. -- @@ -339,14 +388,18 @@ transform (Pipe pstep1 pstep2 pinitial) (Fold fstep finitial fextract) = -- XXX use SPEC? go acc (Pipe.Yield b (Consume ps')) = do acc' <- fstep acc b - return (Tuple' ps' acc') + case acc' of + Partial s -> partialM $ Tuple' ps' s + Done b2 -> doneM b2 go acc (Pipe.Yield b (Produce ps')) = do acc' <- fstep acc b r <- pstep2 ps' - go acc' r + case acc' of + Partial s -> go s r + Done b2 -> doneM b2 - go acc (Pipe.Continue (Consume ps')) = return (Tuple' ps' acc) + go acc (Pipe.Continue (Consume ps')) = partialM $ Tuple' ps' acc go acc (Pipe.Continue (Produce ps')) = do r <- pstep2 ps' @@ -366,10 +419,8 @@ transform (Pipe pstep1 pstep2 pinitial) (Fold fstep finitial fextract) = _Fold1 :: Monad m => (a -> a -> a) -> Fold m a (Maybe a) _Fold1 step = Fold step_ (return Nothing') (return . toMaybe) where - step_ mx a = return $ Just' $ - case mx of - Nothing' -> a - Just' x -> step x a + step_ Nothing' a = partialM $ Just' a + step_ (Just' x) a = partialM $ Just' $ step x a ------------------------------------------------------------------------------ -- Left folds @@ -390,7 +441,7 @@ drain :: Monad m => Fold m a () drain = Fold step begin done where begin = return () - step _ _ = return () + step _ _ = FL.partialM () done = return -- | @@ -403,7 +454,7 @@ drain = Fold step begin done -- @since 0.7.0 {-# INLINABLE drainBy #-} drainBy :: Monad m => (a -> m b) -> Fold m a () -drainBy f = Fold (const (void . f)) (return ()) return +drainBy f = Fold (const (fmap FL.Partial . void . f)) (return ()) return {-# INLINABLE drainBy2 #-} drainBy2 :: Monad m => (a -> m b) -> Fold2 m c a () @@ -427,16 +478,16 @@ last = _Fold1 (flip const) -- > genericLength = fmap getSum $ foldMap (Sum . const 1) -- -- @since 0.7.0 -{-# INLINABLE genericLength #-} +{-# INLINE genericLength #-} genericLength :: (Monad m, Num b) => Fold m a b -genericLength = Fold (\n _ -> return $ n + 1) (return 0) return +genericLength = Fold (\n _ -> partialM $ n + 1) (return 0) return -- | Determine the length of the input stream. -- -- > length = fmap getSum $ foldMap (Sum . const 1) -- -- @since 0.7.0 -{-# INLINABLE length #-} +{-# INLINE length #-} length :: Monad m => Fold m a Int length = genericLength @@ -449,7 +500,7 @@ length = genericLength -- @since 0.7.0 {-# INLINE sum #-} sum :: (Monad m, Num a) => Fold m a a -sum = Fold (\x a -> return $ x + a) (return 0) return +sum = Fold (\x a -> partialM $ x + a) (return 0) return -- | Determine the product of all elements of a stream of numbers. Returns -- multiplicative identity (@1@) when the stream is empty. @@ -459,7 +510,7 @@ sum = Fold (\x a -> return $ x + a) (return 0) return -- @since 0.7.0 {-# INLINABLE product #-} product :: (Monad m, Num a) => Fold m a a -product = Fold (\x a -> return $ x * a) (return 1) return +product = Fold (\x a -> partialM $ x * a) (return 1) return ------------------------------------------------------------------------------ -- To Summary (Maybe) @@ -531,7 +582,7 @@ mean = Fold step (return begin) (return . done) begin = Tuple' 0 0 step (Tuple' x n) y = return $ let n' = n + 1 - in Tuple' (x + (y - x) / n') n' + in Partial $ Tuple' (x + (y - x) / n') n' done (Tuple' x _) = x -- | Compute a numerically stable (population) variance over all elements in @@ -544,7 +595,7 @@ variance = Fold step (return begin) (return . done) where begin = Tuple3' 0 0 0 - step (Tuple3' n mean_ m2) x = return $ Tuple3' n' mean' m2' + step (Tuple3' n mean_ m2) x = partialM $ Tuple3' n' mean' m2' where n' = n + 1 mean' = (n * mean_ + x) / (n + 1) @@ -579,7 +630,7 @@ rollingHashWithSalt salt = Fold step initial extract where k = 2891336453 :: Int64 initial = return salt - step cksum a = return $ cksum * k + fromIntegral (fromEnum a) + step cksum a = partialM $ cksum * k + fromIntegral (fromEnum a) extract = return -- | A default salt used in the implementation of 'rollingHash'. @@ -615,8 +666,8 @@ rollingHashFirstN n = ltake n rollingHash -- /Internal/ -- {-# INLINE sconcat #-} -sconcat :: (Monad m, Semigroup a) => a -> Fold m a a -sconcat i = Fold (\x a -> return $ x <> a) (return i) return +sconcat :: (Monad m, Monoid a) => a -> Fold m a a +sconcat i = Fold (\x a -> partialM $ mappend x a) (return i) return -- | Fold an input stream consisting of monoidal elements using 'mappend' -- and 'mempty'. @@ -624,13 +675,9 @@ sconcat i = Fold (\x a -> return $ x <> a) (return i) return -- > S.fold FL.mconcat (S.map Sum $ S.enumerateFromTo 1 10) -- -- @since 0.7.0 -{-# INLINE mconcat #-} -mconcat :: (Monad m, Monoid a -#if __GLASGOW_HASKELL__ < 804 - , Semigroup a -#endif - ) => Fold m a a -mconcat = sconcat mempty +{-# INLINABLE mconcat #-} +mconcat :: (Monad m, Monoid a) => Fold m a a +mconcat = Fold (\x a -> partialM $ mappend x a) (return mempty) return -- | -- > foldMap f = lmap f mconcat @@ -666,7 +713,7 @@ foldMapM act = Fold step begin done begin = return mempty step m a = do m' <- act a - return $! mappend m m' + return $! Partial $! mappend m m' ------------------------------------------------------------------------------ -- To Containers @@ -682,7 +729,7 @@ foldMapM act = Fold step begin done -- id . (x1 :) . (x2 :) . (x3 :) . ... . (xn :) $ [] {-# INLINABLE toList #-} toList :: Monad m => Fold m a [a] -toList = Fold (\f x -> return $ f . (x :)) +toList = Fold (\f x -> partialM $ f . (x :)) (return id) (return . ($ [])) @@ -711,18 +758,11 @@ drainWhile p = ltakeWhile p drain -- @since 0.7.0 {-# INLINABLE genericIndex #-} genericIndex :: (Integral i, Monad m) => i -> Fold m a (Maybe a) -genericIndex i = Fold step (return $ Left' 0) done +genericIndex i = Fold step (return 0) (const (return Nothing)) where - step x a = return $ - case x of - Left' j -> if i == j - then Right' a - else Left' (j + 1) - _ -> x - done x = return $ - case x of - Left' _ -> Nothing - Right' a -> Just a + step j a = return $ if i == j + then Done $ Just a + else Partial (j + 1) -- | Lookup the element at the given index. -- @@ -750,14 +790,11 @@ head = _Fold1 const -- @since 0.7.0 {-# INLINABLE find #-} find :: Monad m => (a -> Bool) -> Fold m a (Maybe a) -find predicate = Fold step (return Nothing') (return . toMaybe) +find predicate = Fold step (return ()) (const (return Nothing)) where - step x a = return $ - case x of - Nothing' -> if predicate a - then Just' a - else Nothing' - _ -> x + step _ a = return $ if predicate a + then Done (Just a) + else Partial () -- | In a stream of (key-value) pairs @(a, b)@, return the value @b@ of the -- first pair where the key equals the given value @a@. @@ -767,35 +804,22 @@ find predicate = Fold step (return Nothing') (return . toMaybe) -- @since 0.7.0 {-# INLINABLE lookup #-} lookup :: (Eq a, Monad m) => a -> Fold m (a,b) (Maybe b) -lookup a0 = Fold step (return Nothing') (return . toMaybe) +lookup a0 = Fold step (return ()) (const (return Nothing)) where - step x (a,b) = return $ - case x of - Nothing' -> if a == a0 - then Just' b - else Nothing' - _ -> x - --- | Convert strict 'Either'' to lazy 'Maybe' -{-# INLINABLE hush #-} -hush :: Either' a b -> Maybe b -hush (Left' _) = Nothing -hush (Right' b) = Just b + step _ (a,b) = return $ if a == a0 + then Done $ Just b + else Partial () -- | Returns the first index that satisfies the given predicate. -- -- @since 0.7.0 {-# INLINABLE findIndex #-} findIndex :: Monad m => (a -> Bool) -> Fold m a (Maybe Int) -findIndex predicate = Fold step (return $ Left' 0) (return . hush) +findIndex predicate = Fold step (return 0) (const (return Nothing)) where - step x a = return $ - case x of - Left' i -> - if predicate a - then Right' i - else Left' (i + 1) - _ -> x + step i a = return $ if predicate a + then Done $ Just i + else Partial (i + 1) -- | Returns the first index where a given value is found in the stream. -- @@ -817,7 +841,7 @@ elemIndex a = findIndex (a ==) -- @since 0.7.0 {-# INLINABLE null #-} null :: Monad m => Fold m a Bool -null = Fold (\_ _ -> return False) (return True) return +null = Fold (\_ _ -> doneM False) (return True) return -- | -- > any p = lmap p or @@ -828,7 +852,14 @@ null = Fold (\_ _ -> return False) (return True) return -- @since 0.7.0 {-# INLINABLE any #-} any :: Monad m => (a -> Bool) -> Fold m a Bool -any predicate = Fold (\x a -> return $ x || predicate a) (return False) return +any predicate = + Fold + (\_ a -> + if predicate a + then return $ FL.Done True + else return $ FL.Partial False) + (return False) + return -- | Return 'True' if the given element is present in the stream. -- @@ -848,7 +879,14 @@ elem a = any (a ==) -- @since 0.7.0 {-# INLINABLE all #-} all :: Monad m => (a -> Bool) -> Fold m a Bool -all predicate = Fold (\x a -> return $ x && predicate a) (return True) return +all predicate = + Fold + (\_ a -> + if predicate a + then return $ FL.Partial True + else return $ FL.Done False) + (return True) + return -- | Returns 'True' if the given element is not present in the stream. -- @@ -867,7 +905,7 @@ notElem a = all (a /=) -- @since 0.7.0 {-# INLINABLE and #-} and :: Monad m => Fold m Bool Bool -and = Fold (\x a -> return $ x && a) (return True) return +and = all (== True) -- | Returns 'True' if any element is 'True', 'False' otherwise -- @@ -877,7 +915,7 @@ and = Fold (\x a -> return $ x && a) (return True) return -- @since 0.7.0 {-# INLINABLE or #-} or :: Monad m => Fold m Bool Bool -or = Fold (\x a -> return $ x || a) (return False) return +or = any (== True) ------------------------------------------------------------------------------ -- Grouping/Splitting @@ -932,14 +970,17 @@ splitAt splitAt n (Fold stepL initialL extractL) (Fold stepR initialR extractR) = Fold step initial extract where - initial = Tuple3' n <$> initialL <*> initialR + initial = Tuple3' n <$> liftInitialM initialL <*> liftInitialM initialR step (Tuple3' i xL xR) input = if i > 0 - then stepL xL input >>= (\a -> return (Tuple3' (i - 1) a xR)) - else stepR xR input >>= (return . Tuple3' i xL) - - extract (Tuple3' _ a b) = (,) <$> extractL a <*> extractR b + then liftStep stepL xL input >>= (\a -> partialM $ Tuple3' (i - 1) a xR) + else do + b <- liftStep stepR xR input + case b of + Partial _ -> partialM $ Tuple3' i xL b + Done x -> fmap Done $ (,) <$> liftExtract extractL xL <*> return x + extract (Tuple3' _ a b) = (,) <$> liftExtract extractL a <*> liftExtract extractR b ------------------------------------------------------------------------------ -- Element Aware APIs @@ -965,23 +1006,40 @@ spanBy cmp (Fold stepL initialL extractL) (Fold stepR initialR extractR) = Fold step initial extract where - initial = Tuple3' <$> initialL <*> initialR <*> return (Tuple' Nothing True) - step (Tuple3' a b (Tuple' (Just frst) isFirstG)) input = - if cmp frst input && isFirstG - then stepL a input - >>= (\a' -> return (Tuple3' a' b (Tuple' (Just frst) isFirstG))) - else stepR b input - >>= (\a' -> return (Tuple3' a a' (Tuple' Nothing False))) + initial = + Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> + return (Tuple' Nothing True) - step (Tuple3' a b (Tuple' Nothing isFirstG)) input = + step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) + step (Tuple3' a b (Tuple' (Just frst) isFirstG)) input = + if cmp frst input && isFirstG + then liftStep stepL a input >>= + (\a' -> + return $ + Partial $ Tuple3' a' b (Tuple' (Just frst) isFirstG)) + else liftStep stepR b input >>= + (\a' -> partialM $ Tuple3' a a' (Tuple' Nothing False)) + step (Tuple3' a b (Tuple' Nothing isFirstG)) input = if isFirstG - then stepL a input - >>= (\a' -> return (Tuple3' a' b (Tuple' (Just input) isFirstG))) - else stepR b input - >>= (\a' -> return (Tuple3' a a' (Tuple' Nothing False))) + then liftStep stepL a input >>= + (\a' -> + return $ + Partial $ Tuple3' a' b (Tuple' (Just input) isFirstG)) + else liftStep stepR b input >>= + (\a' -> partialM $ Tuple3' a a' (Tuple' Nothing False)) + + extract (Tuple3' a b _) = + (,) <$> liftExtract extractL a <*> liftExtract extractR b + +{- +spanBy cmp fld1 fld2 = + bind (ltakeWhileBy1 cmp fld1) $ \(ma, b) -> + case ma of + Nothing -> (,) b <$> fld2 + Just a -> (,) b <$> prependWith a fld2 +-} - extract (Tuple3' a b _) = (,) <$> extractL a <*> extractR b -- | @span p f1 f2@ composes folds @f1@ and @f2@ such that @f1@ consumes the -- input as long as the predicate @p@ is 'True'. @f2@ consumes the rest of the @@ -1014,14 +1072,29 @@ span p (Fold stepL initialL extractL) (Fold stepR initialR extractR) = where - initial = Tuple3' <$> initialL <*> initialR <*> return True + initial = + Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> + return True + step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) step (Tuple3' a b isFirstG) input = if isFirstG && p input - then stepL a input >>= (\a' -> return (Tuple3' a' b True)) - else stepR b input >>= (\a' -> return (Tuple3' a a' False)) + then liftStep stepL a input >>= + (\a' -> partialM $ Tuple3' a' b True) + else liftStep stepR b input >>= + (\a' -> partialM $ Tuple3' a a' False) + + extract (Tuple3' a b _) = + (,) <$> liftExtract extractL a <*> liftExtract extractR b + +{- +span predicate fld1 fld2 = + bind (ltakeWhile1 predicate fld1) $ \(ma, b) -> + case ma of + Nothing -> (,) b <$> fld2 + Just a -> (,) b <$> prependWith a fld2 +-} - extract (Tuple3' a b _) = (,) <$> extractL a <*> extractR b -- | -- > break p = span (not . p) @@ -1067,18 +1140,26 @@ spanByRolling spanByRolling cmp (Fold stepL initialL extractL) (Fold stepR initialR extractR) = Fold step initial extract - where - initial = Tuple3' <$> initialL <*> initialR <*> return Nothing + where - step (Tuple3' a b (Just frst)) input = - if cmp input frst - then stepL a input >>= (\a' -> return (Tuple3' a' b (Just input))) - else stepR b input >>= (\b' -> return (Tuple3' a b' (Just input))) + initial = + Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> + return Nothing + step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) + + step (Tuple3' a b (Just frst)) input = + if cmp input frst + then liftStep stepL a input >>= + (\a' -> partialM $ Tuple3' a' b (Just input)) + else liftStep stepR b input >>= + (\b' -> partialM $ Tuple3' a b' (Just input)) step (Tuple3' a b Nothing) input = - stepL a input >>= (\a' -> return (Tuple3' a' b (Just input))) + liftStep stepL a input >>= + (\a' -> partialM $ Tuple3' a' b (Just input)) - extract (Tuple3' a b _) = (,) <$> extractL a <*> extractR b + extract (Tuple3' a b _) = + (,) <$> liftExtract extractL a <*> liftExtract extractR b ------------------------------------------------------------------------------ -- Binary splitting on a separator @@ -1123,9 +1204,10 @@ tee f1 f2 = (,) <$> f1 <*> f2 foldNil :: Monad m => Fold m a [b] foldNil = Fold step begin done where begin = return [] - step _ _ = return [] + step _ _ = partialM [] done = return +-- XXX How is the performance? {-# INLINE foldCons #-} foldCons :: Monad m => Fold m a b -> Fold m a [b] -> Fold m a [b] foldCons (Fold stepL beginL doneL) (Fold stepR beginR doneR) = @@ -1133,9 +1215,10 @@ foldCons (Fold stepL beginL doneL) (Fold stepR beginR doneR) = where - begin = Tuple' <$> beginL <*> beginR - step (Tuple' xL xR) a = Tuple' <$> stepL xL a <*> stepR xR a - done (Tuple' xL xR) = (:) <$> doneL xL <*> doneR xR + begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR + step (Tuple' (Done a) (Done b)) _ = doneM $ a:b + step (Tuple' xL xR) a = fmap Partial $ Tuple' <$> liftStep stepL xL a <*> liftStep stepR xR a + done (Tuple' xL xR) = (:) <$> liftExtract doneL xL <*> liftExtract doneR xR -- XXX use "List" instead of "[]"?, use Array for output to scale it to a large -- number of consumers? For polymorphic case a vector could be helpful. For @@ -1166,17 +1249,18 @@ distribute = foldr foldCons foldNil -- | Like 'distribute' but for folds that return (), this can be more efficient -- than 'distribute' as it does not need to maintain state. -- +-- XXX Efficiently find when to stop? {-# INLINE distribute_ #-} distribute_ :: Monad m => [Fold m a ()] -> Fold m a () distribute_ fs = Fold step initial extract where - initial = Prelude.mapM (\(Fold s i e) -> - i >>= \r -> return (Fold s (return r) e)) fs + initial = Prelude.mapM initialize fs step ss a = do - Prelude.mapM_ (\(Fold s i _) -> i >>= \r -> void (s r a)) ss - return ss - extract = - Prelude.mapM_ (\(Fold _ i e) -> i >>= \r -> e r) + -- XXX We can use foldM here instead and check if the number of Stops + -- are equal to the number of Folds + Prelude.mapM_ (\fld -> void $ runStep fld a) ss + partialM ss + extract ss = Prelude.mapM_ (\(Fold _ i e) -> i >>= \r -> e r) ss ------------------------------------------------------------------------------ -- Partitioning @@ -1231,13 +1315,14 @@ partitionByM f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = where - begin = Tuple' <$> beginL <*> beginR + begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR + step (Tuple' (Done x) (Done y)) _ = doneM (x, y) step (Tuple' xL xR) a = do r <- f a case r of - Left b -> Tuple' <$> stepL xL b <*> return xR - Right c -> Tuple' xL <$> stepR xR c - done (Tuple' xL xR) = (,) <$> doneL xL <*> doneR xR + Left b -> fmap Partial $ Tuple' <$> liftStep stepL xL b <*> return xR + Right c -> fmap Partial $ Tuple' xL <$> liftStep stepR xR c + done (Tuple' xL xR) = (,) <$> liftExtract doneL xL <*> liftExtract doneR xR -- Note: we could use (a -> Bool) instead of (a -> Either b c), but the latter -- makes the signature clearer as to which case belongs to which fold. @@ -1309,6 +1394,7 @@ partition = partitionBy id -- @ -- -- @since 0.7.0 +-- XXX Find an efficient way to Done. Check if all the folds have stopped. {-# INLINE demuxWith #-} demuxWith :: (Monad m, Ord k) => (a -> (k, a')) -> Map k (Fold m a' b) -> Fold m a (Map k b) @@ -1320,7 +1406,7 @@ demuxWith f kv = Fold step initial extract -- alterF is available only since containers version 0.5.8.2 #if MIN_VERSION_containers(0,5,8) step mp a = case f a of - (k, a') -> Map.alterF twiddle k mp + (k, a') -> Partial <$> Map.alterF twiddle k mp -- XXX should we raise an exception in Nothing case? -- Ideally we should enforce that it is a total map over k so that look -- up never fails @@ -1328,17 +1414,15 @@ demuxWith f kv = Fold step initial extract -- update in the map. where twiddle Nothing = pure Nothing - twiddle (Just (Fold step' acc extract')) = do - !r <- acc >>= \x -> step' x a' - pure . Just $ Fold step' (return r) extract' + twiddle (Just fld) = Just <$> runStep fld a' #else step mp a = let (k, a') = f a in case Map.lookup k mp of - Nothing -> return mp - Just (Fold step' acc extract') -> do - !r <- acc >>= \x -> step' x a' - return $ Map.insert k (Fold step' (return r) extract') mp + Nothing -> partialM mp + Just fld -> do + !r <- runStep fld a' + partialM $ Map.insert k r mp #endif extract = Prelude.mapM (\(Fold _ acc e) -> acc >>= e) @@ -1367,22 +1451,21 @@ demuxWithDefault_ f kv (Fold dstep dinitial dextract) = where - initFold (Fold s i e) = i >>= \r -> return (Fold s (return r) e) initial = do - mp <- Prelude.mapM initFold kv - Tuple' mp <$> dinitial - + mp <- Prelude.mapM initialize kv + dacc <- liftInitialM dinitial + return (Tuple' mp dacc) step (Tuple' mp dacc) a | (k, a') <- f a = case Map.lookup k mp of Nothing -> do - acc <- dstep dacc (k, a') - return (Tuple' mp acc) + acc <- liftStep dstep dacc (k, a') + partialM $ Tuple' mp acc Just (Fold step' acc _) -> do _ <- acc >>= \x -> step' x a' - return (Tuple' mp dacc) + partialM $ Tuple' mp dacc extract (Tuple' mp dacc) = do - void $ dextract dacc + void $ liftExtract dextract dacc Prelude.mapM_ (\(Fold _ acc e) -> acc >>= e) mp -- | Split the input stream based on a key field and fold each split using a @@ -1411,19 +1494,17 @@ demuxWith_ f kv = Fold step initial extract where - initial = - Prelude.mapM (\(Fold s i e) -> - i >>= \r -> return (Fold s (return r) e)) kv + initial = Prelude.mapM initialize kv step mp a -- XXX should we raise an exception in Nothing case? -- Ideally we should enforce that it is a total map over k so that look -- up never fails | (k, a') <- f a = case Map.lookup k mp of - Nothing -> return mp + Nothing -> partialM mp Just (Fold step' acc _) -> do _ <- acc >>= \x -> step' x a' - return mp + partialM mp extract = Prelude.mapM_ (\(Fold _ acc e) -> acc >>= e) -- | Given a stream of key value pairs and a map from keys to folds, fold the @@ -1476,11 +1557,11 @@ classifyWith f (Fold step initial extract) = Fold step' initial' extract' Nothing -> do x <- initial r <- step x a - return $ Map.insert k r kv + partialM $ Map.insert k r kv Just x -> do - r <- step x a - return $ Map.insert k r kv - extract' = Prelude.mapM extract + r <- liftStep step x a + partialM $ Map.insert k r kv + extract' = Prelude.mapM (liftExtract extract) -- | Given an input stream of key value pairs and a fold for values, fold all -- the values belonging to each key. Useful for map/reduce, bucketizing the @@ -1517,11 +1598,12 @@ unzipWithM f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = where + step (Tuple' (Done l) (Done r)) _ = doneM (l, r) step (Tuple' xL xR) a = do (b,c) <- f a - Tuple' <$> stepL xL b <*> stepR xR c - begin = Tuple' <$> beginL <*> beginR - done (Tuple' xL xR) = (,) <$> doneL xL <*> doneR xR + fmap Partial $ Tuple' <$> liftStep stepL xL b <*> liftStep stepR xR c + begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR + done (Tuple' xL xR) = (,) <$> liftExtract doneL xL <*> liftExtract doneR xR -- | Split elements in the input stream into two parts using a pure splitter -- function, direct each part to a different fold and zip the results. @@ -1614,11 +1696,12 @@ toParallelSVar svar winfo = Fold step initial extract initial = return () - step () x = liftIO $ do + step _ x = liftIO $ do -- XXX we can have a separate fold for unlimited buffer case to avoid a -- branch in the step here. decrementBufferLimit svar void $ send svar (ChildYield x) + return $ FL.Partial () extract () = liftIO $ sendStop svar winfo @@ -1637,12 +1720,12 @@ toParallelSVarLimited svar winfo = Fold step initial extract then do decrementBufferLimit svar void $ send svar (ChildYield x) - return True + return $ FL.Partial True else do cleanupSVarFromWorker svar sendStop svar winfo - return False - step False _ = return False + return $ FL.Done () + step False _ = return $ FL.Done () extract True = liftIO $ sendStop svar winfo extract False = return () diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index 1e1d1a0627..6fabffa883 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -75,11 +75,11 @@ -- functions. The "initial" action generates the initial state of the fold. The -- state is internal to the fold and maintains the accumulated output. The -- "step" function is invoked using the current state and the next input value --- and results in a @Yield@ or @Stop@. A @Yield@ returns the next intermediate --- state of the fold, a @Stop@ indicates that the fold has terminated and +-- and results in a @Partial@ or @Done@. A @Partial@ returns the next intermediate +-- state of the fold, a @Done@ indicates that the fold has terminated and -- returns the final value of the accumulator. -- --- Every @Yield@ indicates that a new accumulated output is available. The +-- Every @Partial@ indicates that a new accumulated output is available. The -- accumulated output can be extracted from the state at any point using -- "extract". "extract" can never fail. A fold returns a valid output even -- without any input i.e. even if you call "extract" on "initial" state it @@ -96,7 +96,7 @@ -- = Alternate Designs -- -- An alternate and simpler design would be to return the intermediate output --- via @Yield@ along with the state, instead of using "extract" on the yielded +-- via @Partial@ along with the state, instead of using "extract" on the yielded -- state and remove the extract function altogether. -- -- This may even facilitate more efficient implementation. Extract from the @@ -107,22 +107,31 @@ -- -- However, removing extract altogether may lead to less optimal code in some -- cases because the driver of the fold needs to thread around the intermediate --- output to return it if the stream stops before the fold could @Stop@. When +-- output to return it if the stream stops before the fold could @Done@. When -- using this approach, the @splitParse (FL.take filesize)@ benchmark shows a -- 2x worse performance even after ensuring everything fuses. So we keep the -- "extract" approach to ensure better perf in all cases. -- --- But we could still yield both state and the output in @Yield@, the output +-- But we could still yield both state and the output in @Partial@, the output -- can be used for the scan use case, instead of using extract. Extract would -- then be used only for the case when the stream stops before the fold -- completes. module Streamly.Internal.Data.Fold.Types - ( Fold (..) + ( Step (..) + , liftStep + , liftExtract + , liftInitial + , liftInitialM + , partialM + , doneM + , Fold (..) + , Fold2 (..) , simplify , toListRevF -- experimental -- $toListRevF + , lmap , lmapM , lfilter @@ -140,6 +149,7 @@ module Streamly.Internal.Data.Fold.Types ) where +import Data.Bifunctor import Control.Applicative (liftA2) import Control.Concurrent (threadDelay, forkIO, killThread) import Control.Concurrent.MVar (MVar, newMVar, takeMVar, putMVar) @@ -153,15 +163,34 @@ import Data.Maybe (isJust, fromJust) import Data.Semigroup (Semigroup(..)) #endif import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) -import Streamly.Internal.Data.Either.Strict (Either'(..)) import Streamly.Internal.Data.SVar (MonadAsync) ------------------------------------------------------------------------------ -- Monadic left folds ------------------------------------------------------------------------------ --- | Represents a left fold over an input stream consisting of values of type --- @a@ to a single value of type @b@ in 'Monad' @m@. +-- {-# ANN type Step Fuse #-} +data Step s b = Partial !s | Done !b + +instance Bifunctor Step where + {-# INLINE bimap #-} + bimap f _ (Partial a) = Partial (f a) + bimap _ g (Done b) = Done (g b) + + {-# INLINE first #-} + first f (Partial a) = Partial (f a) + first _ (Done x) = Done x + + {-# INLINE second #-} + second f (Done a) = Done (f a) + second _ (Partial x) = Partial x + +instance Functor (Step s) where + {-# INLINE fmap #-} + fmap = second + +-- | Represents a left fold over an input stream of values of type @a@ to a +-- single value of type @b@ in 'Monad' @m@. -- -- The fold uses an intermediate state @s@ as accumulator. The @step@ function -- updates the state and returns the new state. When the fold is done @@ -172,7 +201,33 @@ import Streamly.Internal.Data.SVar (MonadAsync) data Fold m a b = -- | @Fold @ @ step @ @ initial @ @ extract@ - forall s. Fold (s -> a -> m s) (m s) (s -> m b) + forall s. Fold (s -> a -> m (Step s b)) (m s) (s -> m b) + +{-# INLINE liftStep #-} +liftStep :: Monad m => (s -> a -> m (Step s b)) -> Step s b -> a -> m (Step s b) +liftStep step (Partial s) a = step s a +liftStep _ x _ = return x + +{-# INLINE liftExtract #-} +liftExtract :: Monad m => (s -> m b) -> Step s b -> m b +liftExtract _ (Done b) = return b +liftExtract done (Partial s) = done s + +{-# INLINE liftInitial #-} +liftInitial :: s -> Step s b +liftInitial = Partial + +{-# INLINE liftInitialM #-} +liftInitialM :: Monad m => m s -> m (Step s b) +liftInitialM = fmap Partial + +{-# INLINE partialM #-} +partialM :: Monad m => s -> m (Step s b) +partialM = return . Partial + +{-# INLINE doneM #-} +doneM :: Monad m => b -> m (Step s b) +doneM = return . Done -- | Experimental type to provide a side input to the fold for generating the -- initial state. For example, if we have to fold chunks of a stream and write @@ -184,28 +239,37 @@ data Fold2 m c a b = forall s. Fold2 (s -> a -> m s) (c -> m s) (s -> m b) -- | Convert more general type 'Fold2' into a simpler type 'Fold' -simplify :: Fold2 m c a b -> c -> Fold m a b -simplify (Fold2 step inject extract) c = Fold step (inject c) extract +simplify :: Functor m => Fold2 m c a b -> c -> Fold m a b +simplify (Fold2 step inject extract) c = + Fold (\x a -> Partial <$> step x a) (inject c) extract -- | Maps a function on the output of the fold (the type @b@). -instance Functor m => Functor (Fold m a) where +instance Monad m => Functor (Fold m a) where {-# INLINE fmap #-} - fmap f (Fold step start done) = Fold step start done' + fmap f (Fold step start done) = Fold step' start done' where + step' x a = do + res <- step x a + case res of + Partial s -> partialM s + Done b -> doneM (f b) done' x = fmap f $! done x -- | The fold resulting from '<*>' distributes its input to both the argument -- folds and combines their output using the supplied function. -instance Applicative m => Applicative (Fold m a) where +instance Monad m => Applicative (Fold m a) where {-# INLINE pure #-} - pure b = Fold (\() _ -> pure ()) (pure ()) (\() -> pure b) - + pure b = Fold (\() _ -> pure $ Done b) (pure ()) (\() -> pure b) {-# INLINE (<*>) #-} (Fold stepL beginL doneL) <*> (Fold stepR beginR doneR) = - let step (Tuple' xL xR) a = Tuple' <$> stepL xL a <*> stepR xR a - begin = Tuple' <$> beginL <*> beginR - done (Tuple' xL xR) = doneL xL <*> doneR xR - in Fold step begin done + let combine (Done dL) (Done dR) = Done $ dL dR + combine sl sr = Partial $ Tuple' sl sr + step (Tuple' xL xR) a = + combine <$> liftStep stepL xL a <*> liftStep stepR xR a + begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR + done (Tuple' xL xR) = liftExtract doneL xL <*> liftExtract doneR xR + in Fold step begin done + -- | Combines the outputs of the folds (the type @b@) using their 'Semigroup' -- instances. @@ -330,7 +394,7 @@ instance (Monad m, Floating b) => Floating (Fold m a b) where -- xn : ... : x2 : x1 : [] {-# INLINABLE toListRevF #-} toListRevF :: Monad m => Fold m a [a] -toListRevF = Fold (\xs x -> return $ x:xs) (return []) return +toListRevF = Fold (\xs x -> partialM $ x:xs) (return []) return -- | @(lmap f fold)@ maps the function @f@ on the input of the fold. -- @@ -367,7 +431,7 @@ lmapM f (Fold step begin done) = Fold step' begin done lfilter :: Monad m => (a -> Bool) -> Fold m a r -> Fold m a r lfilter f (Fold step begin done) = Fold step' begin done where - step' x a = if f a then step x a else return x + step' x a = if f a then step x a else partialM x -- | Like 'lfilter' but with a monadic predicate. -- @@ -378,7 +442,7 @@ lfilterM f (Fold step begin done) = Fold step' begin done where step' x a = do use <- f a - if use then step x a else return x + if use then step x a else partialM x -- | Transform a fold from a pure input to a 'Maybe' input, consuming only -- 'Just' values. @@ -395,7 +459,7 @@ lcatMaybes = lfilter isJust . lmap fromJust -- | Take first @n@ elements from the stream and discard the rest. -- -- @since 0.7.0 -{-# INLINABLE ltake #-} +{-# INLINE ltake #-} ltake :: Monad m => Int -> Fold m a b -> Fold m a b ltake n (Fold step initial done) = Fold step' initial' done' where @@ -404,8 +468,10 @@ ltake n (Fold step initial done) = Fold step' initial' done' if i < n then do res <- step r a - return $ Tuple' (i + 1) res - else return $ Tuple' i r + case res of + Partial s -> partialM $ Tuple' (i + 1) s + Done b -> doneM b + else Done <$> done r done' (Tuple' _ r) = done r -- | Takes elements from the input as long as the predicate succeeds. @@ -413,16 +479,12 @@ ltake n (Fold step initial done) = Fold step' initial' done' -- @since 0.7.0 {-# INLINABLE ltakeWhile #-} ltakeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a b -ltakeWhile predicate (Fold step initial done) = Fold step' initial' done' +ltakeWhile predicate (Fold step initial done) = Fold step' initial done where - initial' = fmap Left' initial - step' (Left' r) a = + step' r a = if predicate a - then Left' <$> step r a - else return (Right' r) - step' r _ = return r - done' (Left' r) = done r - done' (Right' r) = done r + then step r a + else Done <$> done r ------------------------------------------------------------------------------ -- Nesting @@ -440,10 +502,17 @@ ltakeWhile predicate (Fold step initial done) = Fold step' initial' done' -- > 465 -- -- @since 0.7.0 +-- XXX Is this correct? {-# INLINABLE duplicate #-} -duplicate :: Applicative m => Fold m a b -> Fold m a (Fold m a b) +duplicate :: Monad m => Fold m a b -> Fold m a (Fold m a b) duplicate (Fold step begin done) = - Fold step begin (\x -> pure (Fold step (pure x) done)) + Fold step' begin (\x -> pure (Fold step (pure x) done)) + where + step' x a = do + res <- step x a + case res of + Partial s -> pure $ Partial s + Done _ -> pure $ Done $ Fold step (pure x) done -- | Run the initialization effect of a fold. The returned fold would use the -- value returned by this effect as its initial value. @@ -461,7 +530,10 @@ runStep :: Monad m => Fold m a b -> a -> m (Fold m a b) runStep (Fold step initial extract) a = do i <- initial r <- step i a - return $ Fold step (return r) extract + case r of + Partial s -> return $ Fold step (return s) extract + Done b -> return $ Fold (\_ _ -> doneM b) (return i) (\_ -> return b) + ------------------------------------------------------------------------------ -- Parsing @@ -480,23 +552,25 @@ lchunksOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = where - initial' = Tuple3' 0 <$> initial1 <*> initial2 + initial' = Tuple3' 0 <$> liftInitialM initial1 <*> liftInitialM initial2 step' (Tuple3' i r1 r2) a = if i < n then do - res <- step1 r1 a - return $ Tuple3' (i + 1) res r2 + res <- liftStep step1 r1 a + partialM $ Tuple3' (i + 1) res r2 else do - res <- extract1 r1 - acc2 <- step2 r2 res - - i1 <- initial1 - acc1 <- step1 i1 a - return $ Tuple3' 1 acc1 acc2 + res <- liftExtract extract1 r1 + acc2 <- liftStep step2 r2 res + case acc2 of + Done b -> doneM b + Partial _ -> do + i1 <- initial1 + acc1 <- step1 i1 a + partialM $ Tuple3' 1 acc1 acc2 extract' (Tuple3' _ r1 r2) = do - res <- extract1 r1 - acc2 <- step2 r2 res - extract2 acc2 + res <- liftExtract extract1 r1 + acc2 <- liftStep step2 r2 res + liftExtract extract2 acc2 {-# INLINE lchunksOf2 #-} lchunksOf2 :: Monad m => Int -> Fold m a b -> Fold2 m x b c -> Fold2 m x a c @@ -505,21 +579,21 @@ lchunksOf2 n (Fold step1 initial1 extract1) (Fold2 step2 inject2 extract2) = where - inject' x = Tuple3' 0 <$> initial1 <*> inject2 x + inject' x = Tuple3' 0 <$> liftInitialM initial1 <*> inject2 x step' (Tuple3' i r1 r2) a = if i < n then do - res <- step1 r1 a + res <- liftStep step1 r1 a return $ Tuple3' (i + 1) res r2 else do - res <- extract1 r1 + res <- liftExtract extract1 r1 acc2 <- step2 r2 res i1 <- initial1 acc1 <- step1 i1 a return $ Tuple3' 1 acc1 acc2 extract' (Tuple3' _ r1 r2) = do - res <- extract1 r1 + res <- liftExtract extract1 r1 acc2 <- step2 r2 res extract2 acc2 @@ -535,6 +609,7 @@ lchunksOf2 n (Fold step1 initial1 extract1) (Fold2 step2 inject2 extract2) = -- -----Fold m a b----|-Fold n a c-|-Fold n a c-|-...-|----Fold m a c -- -- @ +-- XXX Should we check for mv2 at each step? {-# INLINE lsessionsOf #-} lsessionsOf :: MonadAsync m => Double -> Fold m a b -> Fold m b c -> Fold m a c lsessionsOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = @@ -544,8 +619,8 @@ lsessionsOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = -- XXX MVar may be expensive we need a cheaper synch mechanism here initial' = do - i1 <- initial1 - i2 <- initial2 + i1 <- liftInitialM initial1 + i2 <- liftInitialM initial2 mv1 <- liftIO $ newMVar i1 mv2 <- liftIO $ newMVar (Right i2) t <- control $ \run -> @@ -553,33 +628,40 @@ lsessionsOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = tid <- forkIO $ catch (restore $ void $ run (timerThread mv1 mv2)) (handleChildException mv2) run (return tid) - return $ Tuple3' t mv1 mv2 - step' acc@(Tuple3' _ mv1 _) a = do + return $ Tuple3' t (Partial mv1) mv2 + step' acc@(Tuple3' t (Partial mv1) mv2) a = do r1 <- liftIO $ takeMVar mv1 - res <- step1 r1 a + res <- liftStep step1 r1 a liftIO $ putMVar mv1 res - return acc + case res of + Partial _ -> partialM acc + Done _ -> partialM $ Tuple3' t (Done mv1) mv2 + step' acc@(Tuple3' _ (Done _) _) _ = partialM acc extract' (Tuple3' tid _ mv2) = do r2 <- liftIO $ takeMVar mv2 liftIO $ killThread tid case r2 of Left e -> throwM e - Right x -> extract2 x + Right x -> liftExtract extract2 x timerThread mv1 mv2 = do liftIO $ threadDelay (round $ n * 1000000) r1 <- liftIO $ takeMVar mv1 - i1 <- initial1 + i1 <- liftInitialM initial1 liftIO $ putMVar mv1 i1 - res1 <- extract1 r1 + res1 <- liftExtract extract1 r1 r2 <- liftIO $ takeMVar mv2 - res <- case r2 of - Left _ -> return r2 - Right x -> Right <$> step2 x res1 - liftIO $ putMVar mv2 res - timerThread mv1 mv2 + case r2 of + Left _ -> liftIO $ putMVar mv2 r2 + Right x -> do + res <- liftStep step2 x res1 + case res of + Partial _ -> do + liftIO $ putMVar mv2 $ Right res + timerThread mv1 mv2 + Done _ -> liftIO $ putMVar mv2 $ Right res handleChildException :: MVar (Either SomeException a) -> SomeException -> IO () diff --git a/src/Streamly/Internal/Data/Parser/ParserD.hs b/src/Streamly/Internal/Data/Parser/ParserD.hs index 7eed5fdf9e..785f2511e9 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -160,9 +160,12 @@ where import Control.Exception (assert) import Control.Monad.Catch (MonadCatch, MonadThrow(..)) -import Streamly.Internal.Data.Fold.Types (Fold(..)) +import Streamly.Internal.Data.Fold.Types + (Fold(..), liftInitialM, liftStep, liftExtract) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) +import qualified Streamly.Internal.Data.Fold.Types as FL + import Prelude hiding (any, all, take, takeWhile, sequence, concatMap, maybe, either) import Streamly.Internal.Data.Parser.ParserD.Tee @@ -182,7 +185,12 @@ fromFold (Fold fstep finitial fextract) = Parser step finitial fextract where - step s a = Partial 0 <$> fstep s a + step s a = do + res <- fstep s a + case res of + FL.Partial s1 -> return $ Partial 0 s1 + FL.Done b -> return $ Done 0 b + ------------------------------------------------------------------------------- -- Terminating but not failing folds @@ -311,21 +319,21 @@ either parser = Parser step initial extract take :: Monad m => Int -> Fold m a b -> Parser m a b take n (Fold fstep finitial fextract) = Parser step initial extract - where + where - initial = Tuple' 0 <$> finitial + initial = Tuple' 0 <$> liftInitialM finitial step (Tuple' i r) a | i < n = do - res <- fstep r a + res <- liftStep fstep r a let i1 = i + 1 s1 = Tuple' i1 res if i1 < n then return $ Partial 0 s1 - else Done 0 <$> fextract res - | otherwise = Done 1 <$> fextract r + else Done 0 <$> liftExtract fextract res + | otherwise = Done 1 <$> liftExtract fextract r - extract (Tuple' _ r) = fextract r + extract (Tuple' _ r) = liftExtract fextract r -- | See 'Streamly.Internal.Data.Parser.takeEQ'. -- @@ -339,21 +347,21 @@ takeEQ cnt (Fold fstep finitial fextract) = Parser step initial extract n = max cnt 0 - initial = Tuple' 0 <$> finitial + initial = Tuple' 0 <$> liftInitialM finitial step (Tuple' i r) a | i < n = do - res <- fstep r a + res <- liftStep fstep r a let i1 = i + 1 s1 = Tuple' i1 res if i1 < n then return (Continue 0 s1) - else Done 0 <$> fextract res - | otherwise = Done 1 <$> fextract r + else Done 0 <$> liftExtract fextract res + | otherwise = Done 1 <$> liftExtract fextract r extract (Tuple' i r) = if n == i - then fextract r + then liftExtract fextract r else throwM $ ParseError err where @@ -373,10 +381,10 @@ takeGE cnt (Fold fstep finitial fextract) = Parser step initial extract n = max cnt 0 - initial = Tuple' 0 <$> finitial + initial = Tuple' 0 <$> liftInitialM finitial step (Tuple' i r) a = do - res <- fstep r a + res <- liftStep fstep r a let i1 = i + 1 s1 = Tuple' i1 res return $ @@ -384,7 +392,7 @@ takeGE cnt (Fold fstep finitial fextract) = Parser step initial extract then Continue 0 s1 else Partial 0 s1 - extract (Tuple' i r) = fextract r >>= f + extract (Tuple' i r) = liftExtract fextract r >>= f where @@ -404,16 +412,16 @@ takeGE cnt (Fold fstep finitial fextract) = Parser step initial extract {-# INLINE takeWhile #-} takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b takeWhile predicate (Fold fstep finitial fextract) = - Parser step initial fextract + Parser step initial (liftExtract fextract) where - initial = finitial + initial = liftInitialM finitial step s a = if predicate a - then Partial 0 <$> fstep s a - else Done 1 <$> fextract s + then Partial 0 <$> liftStep fstep s a + else Done 1 <$> liftExtract fextract s -- | See 'Streamly.Internal.Data.Parser.takeWhile1'. -- @@ -438,14 +446,14 @@ takeWhile1 predicate (Fold fstep finitial fextract) = step (Just s) a = if predicate a then do - r <- fstep s a + r <- liftStep fstep s a return $ Partial 0 (Just r) else do - b <- fextract s + b <- liftExtract fextract s return $ Done 1 b extract Nothing = throwM $ ParseError "takeWhile1: end of input" - extract (Just s) = fextract s + extract (Just s) = liftExtract fextract s -- | See 'Streamly.Internal.Data.Parser.sliceSepBy'. -- @@ -454,15 +462,15 @@ takeWhile1 predicate (Fold fstep finitial fextract) = {-# INLINABLE sliceSepBy #-} sliceSepBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b sliceSepBy predicate (Fold fstep finitial fextract) = - Parser step initial fextract + Parser step initial (liftExtract fextract) where - initial = finitial + initial = liftInitialM finitial step s a = if not (predicate a) - then Partial 0 <$> fstep s a - else Done 0 <$> fextract s + then Partial 0 <$> liftStep fstep s a + else Done 0 <$> liftExtract fextract s -- | See 'Streamly.Internal.Data.Parser.sliceEndWith'. -- @@ -496,20 +504,20 @@ sliceSepByMax predicate cnt (Fold fstep finitial fextract) = where - initial = Tuple' 0 <$> finitial + initial = Tuple' 0 <$> liftInitialM finitial step (Tuple' i r) a | not (predicate a) = if i < cnt then do - res <- fstep r a + res <- liftStep fstep r a let i1 = i + 1 s1 = Tuple' i1 res return $ Partial 0 s1 - else Done 1 <$> fextract r - | otherwise = Done 0 <$> fextract r + else Done 1 <$> liftExtract fextract r + | otherwise = Done 0 <$> liftExtract fextract r - extract (Tuple' _ r) = fextract r + extract (Tuple' _ r) = liftExtract fextract r -- | See 'Streamly.Internal.Data.Parser.wordBy'. -- @@ -698,7 +706,7 @@ manyTill (Fold fstep finitial fextract) where initial = do - fs <- finitial + fs <- liftInitialM finitial ManyTillR 0 fs <$> initialR step (ManyTillR cnt fs st) a = do @@ -709,7 +717,7 @@ manyTill (Fold fstep finitial fextract) assert (cnt + 1 - n >= 0) (return ()) return $ Continue n (ManyTillR (cnt + 1 - n) fs s) Done n _ -> do - b <- fextract fs + b <- liftExtract fextract fs return $ Done n b Error _ -> do rR <- initialL @@ -721,10 +729,10 @@ manyTill (Fold fstep finitial fextract) Partial n s -> return $ Partial n (ManyTillL fs s) Continue n s -> return $ Continue n (ManyTillL fs s) Done n b -> do - fs1 <- fstep fs b + fs1 <- liftStep fstep fs b l <- initialR return $ Partial n (ManyTillR 0 fs1 l) Error err -> return $ Error err - extract (ManyTillL fs sR) = extractL sR >>= fstep fs >>= fextract - extract (ManyTillR _ fs _) = fextract fs + extract (ManyTillL fs sR) = extractL sR >>= liftStep fstep fs >>= liftExtract fextract + extract (ManyTillR _ fs _) = liftExtract fextract fs diff --git a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs index 3840ed1437..3eab7d4f0c 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs @@ -134,6 +134,7 @@ import Control.Exception (assert, Exception(..)) import Control.Monad (MonadPlus(..)) import Control.Monad.Catch (MonadCatch, try, throwM, MonadThrow) import Fusion.Plugin.Types (Fuse(..)) +import Streamly.Internal.Data.Fold.Types (liftInitialM, liftStep, liftExtract) import Streamly.Internal.Data.Fold (Fold(..), toList) import Streamly.Internal.Data.Tuple.Strict (Tuple3'(..)) @@ -435,14 +436,14 @@ alt (Parser stepL initialL extractL) (Parser stepR initialR extractR) = {-# INLINE splitMany #-} splitMany :: MonadCatch m => Fold m b c -> Parser m a b -> Parser m a c splitMany (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = - Parser step initial extract + Parser step initial extract where initial = do ps <- initial1 -- parse state - fs <- finitial -- fold state - pure (Tuple3' ps 0 fs) + fs <- liftInitialM finitial -- fold state + pure (Tuple3' ps (0 :: Int) fs) {-# INLINE step #-} step (Tuple3' st cnt fs) a = do @@ -457,18 +458,18 @@ splitMany (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = return $ Continue n (Tuple3' s (cnt1 - n) fs) Done n b -> do s <- initial1 - fs1 <- fstep fs b + fs1 <- liftStep fstep fs b return $ Partial n (Tuple3' s 0 fs1) Error _ -> do - xs <- fextract fs + xs <- liftExtract fextract fs return $ Done cnt1 xs -- XXX The "try" may impact performance if this parser is used as a scan extract (Tuple3' s _ fs) = do r <- try $ extract1 s case r of - Left (_ :: ParseError) -> fextract fs - Right b -> fstep fs b >>= fextract + Left (_ :: ParseError) -> liftExtract fextract fs + Right b -> liftStep fstep fs b >>= liftExtract fextract -- | See documentation of 'Streamly.Internal.Data.Parser.some'. -- @@ -483,8 +484,8 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = initial = do ps <- initial1 -- parse state - fs <- finitial -- fold state - pure (Tuple3' ps 0 (Left fs)) + fs <- liftInitialM finitial -- fold state + pure (Tuple3' ps (0 :: Int) (Left fs)) {-# INLINE step #-} step (Tuple3' st _ (Left fs)) a = do @@ -494,7 +495,7 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = Continue n s -> return $ Continue n (Tuple3' s 0 (Left fs)) Done n b -> do s <- initial1 - fs1 <- fstep fs b + fs1 <- liftStep fstep fs b return $ Partial n (Tuple3' s 0 (Right fs1)) Error err -> return $ Error err step (Tuple3' st cnt (Right fs)) a = do @@ -509,17 +510,17 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = return $ Continue n (Tuple3' s (cnt1 - n) (Right fs)) Done n b -> do s <- initial1 - fs1 <- fstep fs b + fs1 <- liftStep fstep fs b return $ Partial n (Tuple3' s 0 (Right fs1)) - Error _ -> Done cnt1 <$> fextract fs + Error _ -> Done cnt1 <$> liftExtract fextract fs -- XXX The "try" may impact performance if this parser is used as a scan - extract (Tuple3' s _ (Left fs)) = extract1 s >>= fstep fs >>= fextract + extract (Tuple3' s _ (Left fs)) = extract1 s >>= liftStep fstep fs >>= liftExtract fextract extract (Tuple3' s _ (Right fs)) = do r <- try $ extract1 s case r of - Left (_ :: ParseError) -> fextract fs - Right b -> fstep fs b >>= fextract + Left (_ :: ParseError) -> liftExtract fextract fs + Right b -> liftStep fstep fs b >>= liftExtract fextract -- | See 'Streamly.Internal.Data.Parser.die'. -- diff --git a/src/Streamly/Internal/Data/Sink.hs b/src/Streamly/Internal/Data/Sink.hs index 3f965eddd6..563c90e87d 100644 --- a/src/Streamly/Internal/Data/Sink.hs +++ b/src/Streamly/Internal/Data/Sink.hs @@ -66,7 +66,7 @@ import Prelude reverse, iterate, init, and, or, lookup, foldr1, (!!), scanl, scanl1, replicate, concatMap, mconcat, foldMap, unzip) -import Streamly.Internal.Data.Fold.Types (Fold(..)) +import Streamly.Internal.Data.Fold.Types (Fold(..), Step(..)) import Streamly.Internal.Data.Sink.Types (Sink(..)) import qualified Data.Map.Strict as Map @@ -81,7 +81,7 @@ toFold :: Monad m => Sink m a -> Fold m a () toFold (Sink f) = Fold step begin done where begin = return () - step _ = f + step _ a = Partial <$> f a done _ = return () ------------------------------------------------------------------------------ diff --git a/src/Streamly/Internal/Data/SmallArray.hs b/src/Streamly/Internal/Data/SmallArray.hs index dbd617b734..c1f895ccfc 100644 --- a/src/Streamly/Internal/Data/SmallArray.hs +++ b/src/Streamly/Internal/Data/SmallArray.hs @@ -49,12 +49,15 @@ import GHC.IO (unsafePerformIO) import Data.Functor.Identity (runIdentity) import Streamly.Internal.Data.SmallArray.Types + +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) import Streamly.Internal.Data.Unfold.Types (Unfold(..)) import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.Stream.StreamK.Type (IsStream) import Streamly.Internal.Data.Stream.Serial (SerialT) import qualified Streamly.Internal.Data.Stream.StreamD as D +import qualified Streamly.Internal.Data.Fold.Types as FL {-# NOINLINE bottomElement #-} bottomElement :: a @@ -107,13 +110,13 @@ writeN limit = Fold step initial extract where initial = do marr <- liftIO $ newSmallArray limit bottomElement - return (marr, 0) - step (marr, i) x - | i == limit = return (marr, i) + return (Tuple' marr 0) + step (Tuple' marr i) x + | i == limit = fmap FL.Done $ liftIO $ freezeSmallArray marr 0 i | otherwise = do liftIO $ writeSmallArray marr i x - return (marr, i + 1) - extract (marr, len) = liftIO $ freezeSmallArray marr 0 len + FL.partialM (Tuple' marr (i + 1)) + extract (Tuple' marr len) = liftIO $ freezeSmallArray marr 0 len {-# INLINE_NORMAL fromStreamDN #-} fromStreamDN :: MonadIO m => Int -> D.Stream m a -> m (SmallArray a) diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index 0a152da9bd..8e0a52852d 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -1991,7 +1991,7 @@ toHandle h = go -- /Internal/ {-# INLINE toStream #-} toStream :: Monad m => Fold m a (SerialT Identity a) -toStream = Fold (\f x -> return $ f . (x `K.cons`)) +toStream = Fold (\f x -> return $ FL.Partial $ f . (x `K.cons`)) (return id) (return . ($ K.nil)) @@ -2009,7 +2009,7 @@ toStream = Fold (\f x -> return $ f . (x `K.cons`)) -- xn : ... : x2 : x1 : [] {-# INLINABLE toStreamRev #-} toStreamRev :: Monad m => Fold m a (SerialT Identity a) -toStreamRev = Fold (\xs x -> return $ x `K.cons` xs) (return K.nil) return +toStreamRev = Fold (\xs x -> return $ FL.Partial $ x `K.cons` xs) (return K.nil) return -- | Convert a stream to a pure stream. -- @@ -2346,14 +2346,14 @@ scanl1' step m = fromStreamD $ D.scanl1' step $ toStreamD m -- @since 0.7.0 {-# INLINE scan #-} scan :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -scan (Fold step begin done) = P.scanlMx' step begin done +scan = P.scanFold -- | Postscan a stream using the given monadic fold. -- -- @since 0.7.0 {-# INLINE postscan #-} postscan :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -postscan (Fold step begin done) = P.postscanlMx' step begin done +postscan = P.postscanFold ------------------------------------------------------------------------------ -- Stateful Transformations @@ -4738,14 +4738,14 @@ classifySessionsBy tick tmout reset ejectPred let curTime = max sessionEventTime timestamp accumulate v = do old <- case v of - Nothing -> initial + Nothing -> FL.liftInitialM initial Just (Tuple' _ acc) -> return acc - new <- step old value + new <- FL.liftStep step old value return $ Tuple' timestamp new mOld = Map.lookup key sessionKeyValueMap acc@(Tuple' _ fres) <- accumulate mOld - res <- extract fres + res <- FL.liftExtract extract fres case res of Left x -> do -- deleting a key from the heap is expensive, so we never @@ -4823,7 +4823,7 @@ classifySessionsBy tick tmout reset ejectPred -- delete from map and output the fold accumulator ejectEntry hp mp out cnt acc key = do - sess <- extract acc + sess <- FL.liftExtract extract acc let out1 = (key, fromEither sess) `K.cons` out let mp1 = Map.delete key mp return (hp, mp1, out1, cnt - 1) diff --git a/src/Streamly/Internal/Data/Stream/Prelude.hs b/src/Streamly/Internal/Data/Stream/Prelude.hs index 8db54cd0a0..485fc378b7 100644 --- a/src/Streamly/Internal/Data/Stream/Prelude.hs +++ b/src/Streamly/Internal/Data/Stream/Prelude.hs @@ -44,6 +44,8 @@ module Streamly.Internal.Data.Stream.Prelude , scanlMx' , postscanlx' , postscanlMx' + , postscanFold + , scanFold -- * Zip style operations , eqBy @@ -202,7 +204,7 @@ foldlT f z s = S.foldlT f z (toStreamS s) {-# INLINE runFold #-} runFold :: (Monad m, IsStream t) => Fold m a b -> t m a -> m b -runFold (Fold step begin done) = foldlMx' step begin done +runFold fld m = S.runFold fld $ toStreamS m ------------------------------------------------------------------------------ -- Scans @@ -230,6 +232,17 @@ scanlMx' :: (IsStream t, Monad m) scanlMx' step begin done m = D.fromStreamD $ D.scanlMx' step begin done $ D.toStreamD m +{-# INLINE_NORMAL postscanFold #-} +postscanFold :: (IsStream t, Monad m) + => Fold m a b -> t m a -> t m b +postscanFold fld m = + D.fromStreamD $ D.postscanFold fld $ D.toStreamD m + +{-# INLINE scanFold #-} +scanFold :: (IsStream t, Monad m) + => Fold m a b -> t m a -> t m b +scanFold fld m = D.fromStreamD $ D.scanFold fld $ D.toStreamD m + -- scanl followed by map -- -- | Strict left scan with an extraction function. Like 'scanl'', but applies a diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index e9dd059690..e3a7a45792 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -224,6 +224,8 @@ module Streamly.Internal.Data.Stream.StreamD , postscanlMx' , scanlMx' , scanlx' + , postscanFold + , scanFold -- * Filtering , filter @@ -330,7 +332,7 @@ import Streamly.Internal.Data.Time.Units (TimeUnit64, toRelTime64, diffAbsTime64, RelTime64) import Streamly.Internal.Data.Atomics (atomicModifyIORefCAS_) import Streamly.Internal.Data.Array.Storable.Foreign.Types (Array(..)) -import Streamly.Internal.Data.Fold.Types (Fold(..)) +import Streamly.Internal.Data.Fold.Types (Fold(..), liftStep, liftExtract, liftInitialM) import Streamly.Internal.Data.Parser (ParseError(..)) import Streamly.Internal.Data.Pipe.Types (Pipe(..), PipeState(..)) import Streamly.Internal.Data.Time.Clock (Clock(Monotonic), getTime) @@ -1521,7 +1523,7 @@ splitSuffixBy' predicate f (Stream step state) = acc <- initial acc' <- fstep acc x if (predicate x) - then done acc' >>= \val -> return $ Yield val (Just s) + then liftExtract done acc' >>= \val -> return $ Yield val (Just s) else go SPEC s acc' Skip s -> return $ Skip $ Just s @@ -1533,12 +1535,12 @@ splitSuffixBy' predicate f (Stream step state) = res <- step (adaptState gst) stt case res of Yield x s -> do - acc' <- fstep acc x + acc' <- liftStep fstep acc x if (predicate x) - then done acc' >>= \val -> return $ Yield val (Just s) + then liftExtract done acc' >>= \val -> return $ Yield val (Just s) else go SPEC s acc' Skip s -> go SPEC s acc - Stop -> done acc >>= \val -> return $ Yield val Nothing + Stop -> liftExtract done acc >>= \val -> return $ Yield val Nothing stepOuter _ _ Nothing = return Stop @@ -1572,11 +1574,11 @@ groupsBy cmp f (Stream step state) = Stream (stepOuter f) (Just state, Nothing) Yield x s -> do if cmp x prev then do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC prev s acc' - else done acc >>= \r -> return $ Yield r (Just s, Just x) + else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) Skip s -> go SPEC prev s acc - Stop -> done acc >>= \r -> return $ Yield r (Nothing, Nothing) + Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) stepOuter (Fold fstep initial done) gst (Just st, Just prev) = do acc <- initial @@ -1592,11 +1594,11 @@ groupsBy cmp f (Stream step state) = Stream (stepOuter f) (Just state, Nothing) Yield x s -> do if cmp x prev then do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC s acc' - else done acc >>= \r -> return $ Yield r (Just s, Just x) + else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r (Nothing, Nothing) + Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) stepOuter _ _ (Nothing,_) = return Stop @@ -1629,12 +1631,12 @@ groupsRollingBy cmp f (Stream step state) = Yield x s -> do if cmp prev x then do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC x s acc' else - done acc >>= \r -> return $ Yield r (Just s, Just x) + liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) Skip s -> go SPEC prev s acc - Stop -> done acc >>= \r -> return $ Yield r (Nothing, Nothing) + Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) stepOuter (Fold fstep initial done) gst (Just st, Just prev') = do acc <- initial @@ -1648,11 +1650,11 @@ groupsRollingBy cmp f (Stream step state) = Yield x s -> do if cmp prevv x then do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC x s acc' - else done acc >>= \r -> return $ Yield r (Just s, Just x) + else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) Skip s -> go SPEC prevv s acc - Stop -> done acc >>= \r -> return $ Yield r (Nothing, Nothing) + Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) stepOuter _ _ (Nothing, _) = return Stop @@ -1663,7 +1665,7 @@ splitBy predicate f (Stream step state) = Stream (step' f) (Just state) where {-# INLINE_LATE step' #-} - step' (Fold fstep initial done) gst (Just st) = initial >>= go SPEC st + step' (Fold fstep initial done) gst (Just st) = liftInitialM initial >>= go SPEC st where @@ -1672,12 +1674,12 @@ splitBy predicate f (Stream step state) = Stream (step' f) (Just state) case res of Yield x s -> do if predicate x - then done acc >>= \r -> return $ Yield r (Just s) + then liftExtract done acc >>= \r -> return $ Yield r (Just s) else do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC s acc' Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r Nothing + Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing step' _ _ Nothing = return Stop @@ -1711,12 +1713,12 @@ splitSuffixBy predicate f (Stream step state) = Stream (step' f) (Just state) case res of Yield x s -> do if predicate x - then done acc >>= \r -> return $ Yield r (Just s) + then liftExtract done acc >>= \r -> return $ Yield r (Just s) else do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC s acc' Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r Nothing + Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing step' _ _ Nothing = return Stop @@ -1748,12 +1750,12 @@ wordsBy predicate f (Stream step state) = Stream (stepOuter f) (Just state) case res of Yield x s -> do if predicate x - then done acc >>= \r -> return $ Yield r (Just s) + then liftExtract done acc >>= \r -> return $ Yield r (Just s) else do - acc' <- fstep acc x + acc' <- liftStep fstep acc x go SPEC s acc' Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r Nothing + Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing stepOuter _ _ Nothing = return Stop @@ -1814,7 +1816,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = (rb, rhead) <- liftIO $ RB.new patLen return $ Skip $ GO_KARP_RABIN state rb rhead - stepOuter gst (GO_SINGLE_PAT stt pat) = initial >>= go SPEC stt + stepOuter gst (GO_SINGLE_PAT stt pat) = liftInitialM initial >>= go SPEC stt where @@ -1824,13 +1826,13 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do if pat == x then do - r <- done acc + r <- liftExtract done acc return $ Yield r (GO_SINGLE_PAT s pat) - else fstep acc x >>= go SPEC s + else liftStep fstep acc x >>= go SPEC s Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r GO_DONE + Stop -> liftExtract done acc >>= \r -> return $ Yield r GO_DONE - stepOuter gst (GO_SHORT_PAT stt) = initial >>= go0 SPEC 0 (0 :: Word) stt + stepOuter gst (GO_SHORT_PAT stt) = liftInitialM initial >>= go0 SPEC 0 (0 :: Word) stt where @@ -1851,7 +1853,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = then do if wrd' .&. mask == patWord then do - r <- done acc + r <- liftExtract done acc return $ Yield r (GO_SHORT_PAT s) else go1 SPEC wrd' s acc else go0 SPEC (idx + 1) wrd' s acc @@ -1860,7 +1862,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = acc' <- if idx /= 0 then go2 wrd idx acc else return acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE {-# INLINE go1 #-} go1 !_ wrd st !acc = do @@ -1869,22 +1871,22 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do let wrd' = addToWord wrd x old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - acc' <- fstep acc (toEnum $ fromIntegral old) + acc' <- liftStep fstep acc (toEnum $ fromIntegral old) if wrd' .&. mask == patWord - then done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) + then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) else go1 SPEC wrd' s acc' Skip s -> go1 SPEC wrd s acc Stop -> do acc' <- go2 wrd patLen acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE go2 !wrd !n !acc | n > 0 = do let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) + liftStep fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) go2 _ _ acc = return acc stepOuter gst (GO_KARP_RABIN stt rb rhead) = do - initial >>= go0 SPEC 0 rhead stt + liftInitialM initial >>= go0 SPEC 0 rhead stt where @@ -1914,9 +1916,9 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Skip s -> go0 SPEC idx rh s acc Stop -> do !acc' <- if idx /= 0 - then RB.unsafeFoldRingM rh fstep acc rb + then RB.unsafeFoldRingM rh (liftStep fstep) acc rb else return acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE -- XXX Theoretically this code can do 4 times faster if GHC generates -- optimal code. If we use just "(cksum' == patHash)" condition it goes @@ -1930,7 +1932,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do old <- liftIO $ peek rh let cksum' = deltaCksum cksum old x - acc' <- fstep acc old + acc' <- liftStep fstep acc old if (cksum' == patHash) then do @@ -1941,13 +1943,13 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = go1 SPEC cksum' rh' s acc' Skip s -> go1 SPEC cksum rh s acc Stop -> do - acc' <- RB.unsafeFoldRingFullM rh fstep acc rb - done acc' >>= \r -> return $ Yield r GO_DONE + acc' <- RB.unsafeFoldRingFullM rh (liftStep fstep) acc rb + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE go2 !_ !cksum' !rh' s !acc' = do if RB.unsafeEqArray rb rh' patArr then do - r <- done acc' + r <- liftExtract done acc' return $ Yield r (GO_KARP_RABIN s rb rhead) else go1 SPEC cksum' rh' s acc' @@ -1957,7 +1959,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do acc <- initial acc' <- fstep acc x - done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + liftExtract done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) Skip s -> return $ Skip (GO_EMPTY_PAT s) Stop -> return Stop @@ -2006,8 +2008,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) acc <- initial if pat == x then do - acc' <- if withSep then fstep acc x else return acc - done acc' >>= \r -> return $ Yield r (GO_SINGLE_PAT s pat) + acc' <- if withSep then fstep acc x else FL.partialM acc + liftExtract done acc' >>= \r -> return $ Yield r (GO_SINGLE_PAT s pat) else fstep acc x >>= go SPEC s Skip s -> return $ Skip $ (GO_SINGLE_PAT s pat) Stop -> return Stop @@ -2021,12 +2023,12 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do if pat == x then do - acc' <- if withSep then fstep acc x else return acc - r <- done acc' + acc' <- if withSep then liftStep fstep acc x else return acc + r <- liftExtract done acc' return $ Yield r (GO_SINGLE_PAT s pat) - else fstep acc x >>= go SPEC s + else liftStep fstep acc x >>= go SPEC s Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r GO_DONE + Stop -> liftExtract done acc >>= \r -> return $ Yield r GO_DONE stepOuter gst (GO_SHORT_PAT stt) = do @@ -2042,11 +2044,11 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do acc <- initial let wrd' = addToWord wrd x - acc' <- if withSep then fstep acc x else return acc + acc' <- if withSep then fstep acc x else FL.partialM acc if idx == maxIndex then do if wrd' .&. mask == patWord - then done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) + then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) else go0 SPEC (idx + 1) wrd' s acc' else go0 SPEC (idx + 1) wrd' s acc' Skip s -> return $ Skip (GO_SHORT_PAT s) @@ -2067,12 +2069,12 @@ splitSuffixOn withSep patArr (Fold fstep initial done) case res of Yield x s -> do let wrd' = addToWord wrd x - acc' <- if withSep then fstep acc x else return acc + acc' <- if withSep then liftStep fstep acc x else return acc if idx == maxIndex then do if wrd' .&. mask == patWord then do - r <- done acc' + r <- liftExtract done acc' return $ Yield r (GO_SHORT_PAT s) else go1 SPEC wrd' s acc' else go0 SPEC (idx + 1) wrd' s acc' @@ -2084,7 +2086,7 @@ splitSuffixOn withSep patArr (Fold fstep initial done) acc' <- if idx /= 0 && not withSep then go2 wrd idx acc else return acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE {-# INLINE go1 #-} go1 !_ wrd st !acc = do @@ -2094,10 +2096,10 @@ splitSuffixOn withSep patArr (Fold fstep initial done) let wrd' = addToWord wrd x old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) acc' <- if withSep - then fstep acc x - else fstep acc (toEnum $ fromIntegral old) + then liftStep fstep acc x + else liftStep fstep acc (toEnum $ fromIntegral old) if wrd' .&. mask == patWord - then done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) + then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) else go1 SPEC wrd' s acc' Skip s -> go1 SPEC wrd s acc Stop -> @@ -2109,11 +2111,11 @@ splitSuffixOn withSep patArr (Fold fstep initial done) acc' <- if withSep then return acc else go2 wrd patLen acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE go2 !wrd !n !acc | n > 0 = do let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) + liftStep fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) go2 _ _ acc = return acc stepOuter gst (GO_KARP_RABIN stt rb rhead) = do @@ -2122,7 +2124,7 @@ splitSuffixOn withSep patArr (Fold fstep initial done) case res of Yield x s -> do acc <- initial - acc' <- if withSep then fstep acc x else return acc + acc' <- if withSep then fstep acc x else FL.partialM acc rh' <- liftIO (RB.unsafeInsert rb rhead x) if idx == maxIndex then do @@ -2151,7 +2153,7 @@ splitSuffixOn withSep patArr (Fold fstep initial done) res <- step (adaptState gst) st case res of Yield x s -> do - acc' <- if withSep then fstep acc x else return acc + acc' <- if withSep then liftStep fstep acc x else return acc rh' <- liftIO (RB.unsafeInsert rb rh x) if idx == maxIndex then do @@ -2168,9 +2170,9 @@ splitSuffixOn withSep patArr (Fold fstep initial done) then return Stop else do !acc' <- if idx /= 0 && not withSep - then RB.unsafeFoldRingM rh fstep acc rb + then RB.unsafeFoldRingM rh (liftStep fstep) acc rb else return acc - done acc' >>= \r -> return $ Yield r GO_DONE + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE -- XXX Theoretically this code can do 4 times faster if GHC generates -- optimal code. If we use just "(cksum' == patHash)" condition it goes @@ -2185,8 +2187,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) old <- liftIO $ peek rh let cksum' = deltaCksum cksum old x acc' <- if withSep - then fstep acc x - else fstep acc old + then liftStep fstep acc x + else liftStep fstep acc old if (cksum' == patHash) then do @@ -2202,13 +2204,13 @@ splitSuffixOn withSep patArr (Fold fstep initial done) else do acc' <- if withSep then return acc - else RB.unsafeFoldRingFullM rh fstep acc rb - done acc' >>= \r -> return $ Yield r GO_DONE + else RB.unsafeFoldRingFullM rh (liftStep fstep) acc rb + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE go2 !_ !cksum' !rh' s !acc' = do if RB.unsafeEqArray rb rh' patArr then do - r <- done acc' + r <- liftExtract done acc' return $ Yield r (GO_KARP_RABIN s rb rhead) else go1 SPEC cksum' rh' s acc' @@ -2218,7 +2220,7 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do acc <- initial acc' <- fstep acc x - done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + liftExtract done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) Skip s -> return $ Skip (GO_EMPTY_PAT s) Stop -> return Stop @@ -3605,6 +3607,31 @@ scanlMx' :: Monad m scanlMx' fstep begin done s = (begin >>= \x -> x `seq` done x) `consM` postscanlMx' fstep begin done s +{-# INLINE_NORMAL postscanFold #-} +postscanFold :: Monad m + => FL.Fold m a b -> Stream m a -> Stream m b +postscanFold (FL.Fold fstep begin done) (Stream step state) = do + Stream step' (state, liftInitialM begin) + where + {-# INLINE_LATE step' #-} + step' gst (st, acc) = do + r <- step (adaptState gst) st + case r of + Yield x s -> do + old <- acc + y <- liftStep fstep old x + v <- liftExtract done y + v `seq` y `seq` return (Yield v (s, return y)) + Skip s -> return $ Skip (s, acc) + Stop -> return Stop + + +{-# INLINE scanFold #-} +scanFold :: Monad m + => FL.Fold m a b -> Stream m a -> Stream m b +scanFold fld@(FL.Fold _ begin done) s = + (begin >>= \x -> x `seq` done x) `consM` postscanFold fld s + {-# INLINE scanlx' #-} scanlx' :: Monad m => (x -> a -> x) -> x -> (x -> b) -> Stream m a -> Stream m b @@ -3791,18 +3818,18 @@ tap (Fold fstep initial extract) (Stream step state) = Stream step' Nothing where step' _ Nothing = do - r <- initial + r <- liftInitialM initial return $ Skip (Just (r, state)) step' gst (Just (acc, st)) = acc `seq` do r <- step gst st case r of Yield x s -> do - acc' <- fstep acc x + acc' <- liftStep fstep acc x return $ Yield x (Just (acc', s)) Skip s -> return $ Skip (Just (acc, s)) Stop -> do - void $ extract acc + void $ liftExtract extract acc return $ Stop {-# INLINE_NORMAL tapOffsetEvery #-} @@ -3815,18 +3842,18 @@ tapOffsetEvery offset n (Fold fstep initial extract) (Stream step state) = {-# INLINE_LATE step' #-} step' _ Nothing = do - r <- initial + r <- liftInitialM initial return $ Skip (Just (r, state, offset `mod` n)) step' gst (Just (acc, st, count)) | count <= 0 = do r <- step gst st case r of Yield x s -> do - !acc' <- fstep acc x + !acc' <- liftStep fstep acc x return $ Yield x (Just (acc', s, n - 1)) Skip s -> return $ Skip (Just (acc, s, count)) Stop -> do - void $ extract acc + void $ liftExtract extract acc return $ Stop step' gst (Just (acc, st, count)) = do @@ -3835,7 +3862,7 @@ tapOffsetEvery offset n (Fold fstep initial extract) (Stream step state) = Yield x s -> return $ Yield x (Just (acc, s, count - 1)) Skip s -> return $ Skip (Just (acc, s, count)) Stop -> do - void $ extract acc + void $ liftExtract extract acc return $ Stop {-# INLINE_NORMAL pollCounts #-} @@ -4364,7 +4391,21 @@ the (Stream step state) = go state {-# INLINE runFold #-} runFold :: (Monad m) => Fold m a b -> Stream m a -> m b -runFold (Fold step begin done) = foldlMx' step begin done +runFold (Fold fstep begin done) (Stream step state) = + begin >>= \x -> go SPEC x state + where + -- XXX !acc? + {-# INLINE_LATE go #-} + go !_ acc st = acc `seq` do + r <- step defState st + case r of + Yield x s -> do + acc' <- fstep acc x + case acc' of + FL.Done b -> return b + FL.Partial acc'' -> go SPEC acc'' s + Skip s -> go SPEC acc s + Stop -> done acc ------------------------------------------------------------------------------- -- Concurrent application and fold @@ -4578,7 +4619,7 @@ lastN n where step (Tuple3' rb rh i) a = do rh1 <- liftIO $ RB.unsafeInsert rb rh a - return $ Tuple3' rb rh1 (i + 1) + FL.partialM $ Tuple3' rb rh1 (i + 1) initial = fmap (\(a, b) -> Tuple3' a b (0 :: Int)) $ liftIO $ RB.new n done (Tuple3' rb rh i) = do arr <- liftIO $ MA.newArray n diff --git a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs index 600444d714..97c0a43468 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs @@ -62,7 +62,7 @@ import Prelude hiding (map, mapM, foldr, take, concatMap) import Fusion.Plugin.Types (Fuse(..)) import Streamly.Internal.Data.SVar (State(..), adaptState, defState) -import Streamly.Internal.Data.Fold.Types (Fold(..), Fold2(..)) +import Streamly.Internal.Data.Fold.Types (Fold(..), liftInitialM, liftStep, liftExtract, Fold2(..)) import qualified Streamly.Internal.Data.Stream.StreamK as K @@ -465,7 +465,7 @@ foldlMx' fstep begin done (Stream step state) = case r of Yield x s -> do acc' <- fstep acc x - go SPEC acc' s + go SPEC acc' s Skip s -> go SPEC acc s Stop -> done acc @@ -610,14 +610,14 @@ groupsOf n (Fold fstep initial extract) (Stream step state) = error $ "Streamly.Internal.Data.Stream.StreamD.Type.groupsOf: the size of " ++ "groups [" ++ show n ++ "] must be a natural number" -- fs = fold state - fs <- initial + fs <- liftInitialM initial return $ Skip (GroupBuffer st fs 0) step' gst (GroupBuffer st fs i) = do r <- step (adaptState gst) st case r of Yield x s -> do - !fs' <- fstep fs x + !fs' <- liftStep fstep fs x let i' = i + 1 return $ if i' >= n @@ -627,7 +627,7 @@ groupsOf n (Fold fstep initial extract) (Stream step state) = Stop -> return $ Skip (GroupYield fs GroupFinish) step' _ (GroupYield fs next) = do - r <- extract fs + r <- liftExtract extract fs return $ Yield r next step' _ GroupFinish = return Stop diff --git a/src/Streamly/Internal/Data/Stream/StreamK.hs b/src/Streamly/Internal/Data/Stream/StreamK.hs index 49aac97f01..eb826624f3 100644 --- a/src/Streamly/Internal/Data/Stream/StreamK.hs +++ b/src/Streamly/Internal/Data/Stream/StreamK.hs @@ -88,6 +88,7 @@ module Streamly.Internal.Data.Stream.StreamK , foldlT , foldlx' , foldlMx' + , runFold -- ** Specialized Folds , drain @@ -198,6 +199,8 @@ import qualified Prelude import Streamly.Internal.Data.SVar import Streamly.Internal.Data.Stream.StreamK.Type +import qualified Streamly.Internal.Data.Fold.Types as FL + ------------------------------------------------------------------------------- -- Deconstruction ------------------------------------------------------------------------------- @@ -431,6 +434,20 @@ foldlMx' step begin done m = go begin m yieldk a r = acc >>= \b -> step b a >>= \x -> go (return x) r in foldStream defState yieldk single stop m1 +{-# INLINABLE runFold #-} +runFold :: (IsStream t, Monad m) => FL.Fold m a b -> t m a -> m b +runFold (FL.Fold step begin done) m = go begin m + where + go !acc m1 = + let stop = acc >>= done + single a = acc >>= \b -> step b a >>= FL.liftExtract done + yieldk a r = acc + >>= \b -> step b a + >>= \x -> case x of + FL.Partial s -> go (return s) r + FL.Done b1 -> return b1 + in foldStream defState yieldk single stop m1 + -- | Like 'foldl'' but with a monadic step function. {-# INLINE foldlM' #-} foldlM' :: (IsStream t, Monad m) => (b -> a -> m b) -> m b -> t m a -> m b diff --git a/src/Streamly/Internal/Data/Unfold.hs b/src/Streamly/Internal/Data/Unfold.hs index f5eb8b1a6d..c633f65b56 100644 --- a/src/Streamly/Internal/Data/Unfold.hs +++ b/src/Streamly/Internal/Data/Unfold.hs @@ -146,6 +146,7 @@ import qualified Control.Monad.Catch as MC import qualified Data.Tuple as Tuple import qualified Streamly.Internal.Data.Stream.StreamK as K import qualified Streamly.Internal.Data.Stream.StreamD as D +import qualified Streamly.Internal.Data.Fold.Types as FL ------------------------------------------------------------------------------- -- Input operations @@ -261,7 +262,9 @@ fold (Unfold ustep inject) (Fold fstep initial extract) a = case r of Yield x s -> do acc' <- fstep acc x - go SPEC acc' s + case acc' of + FL.Partial acc'' -> go SPEC acc'' s + FL.Done c -> return c Skip s -> go SPEC acc s Stop -> extract acc @@ -451,7 +454,7 @@ repeatM = Unfold step return fromList :: Monad m => Unfold m [a] a fromList = Unfold step inject where - inject x = return x + inject = return {-# INLINE_LATE step #-} step (x:xs) = return $ Yield x xs step [] = return Stop @@ -461,7 +464,7 @@ fromList = Unfold step inject fromListM :: Monad m => Unfold m [m a] a fromListM = Unfold step inject where - inject x = return x + inject = return {-# INLINE_LATE step #-} step (x:xs) = x >>= \r -> return $ Yield r xs step [] = return Stop diff --git a/src/Streamly/Internal/FileSystem/File.hs b/src/Streamly/Internal/FileSystem/File.hs index e96f172b0f..9df512c7b0 100644 --- a/src/Streamly/Internal/FileSystem/File.hs +++ b/src/Streamly/Internal/FileSystem/File.hs @@ -360,7 +360,7 @@ writeChunks path = Fold step initial extract return (fld, h) step (fld, h) x = do r <- FL.runStep fld x `MC.onException` liftIO (hClose h) - return (r, h) + FL.partialM (r, h) extract (Fold _ initial1 extract1, h) = do liftIO $ hClose h initial1 >>= extract1 diff --git a/src/Streamly/Internal/Network/Inet/TCP.hs b/src/Streamly/Internal/Network/Inet/TCP.hs index b0068ba31a..1ebc0779dc 100644 --- a/src/Streamly/Internal/Network/Inet/TCP.hs +++ b/src/Streamly/Internal/Network/Inet/TCP.hs @@ -104,6 +104,7 @@ import Network.Socket import Prelude hiding (read) import Streamly.Prelude (MonadAsync) +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.SVar (fork) import Streamly.Internal.Data.Unfold.Types (Unfold(..)) @@ -356,11 +357,11 @@ writeChunks addr port = Fold step initial extract initial = do skt <- liftIO (connect addr port) fld <- FL.initialize (SK.writeChunks skt) `MC.onException` liftIO (Net.close skt) - return (fld, skt) - step (fld, skt) x = do + return (Tuple' fld skt) + step (Tuple' fld skt) x = do r <- FL.runStep fld x `MC.onException` liftIO (Net.close skt) - return (r, skt) - extract (Fold _ initial1 extract1, skt) = do + FL.partialM (Tuple' r skt) + extract (Tuple' (Fold _ initial1 extract1) skt) = do liftIO $ Net.close skt initial1 >>= extract1 diff --git a/test/Streamly/Test/Internal/Data/Parser/ParserD.hs b/test/Streamly/Test/Internal/Data/Parser/ParserD.hs index 281c9e79b6..57232aed01 100644 --- a/test/Streamly/Test/Internal/Data/Parser/ParserD.hs +++ b/test/Streamly/Test/Internal/Data/Parser/ParserD.hs @@ -65,10 +65,12 @@ chooseInt = choose fromFold :: Property fromFold = - forAll (listOf $ chooseInt (min_value, max_value)) $ \ls -> - case (==) <$> (S.parseD (P.fromFold FL.sum) (S.fromList ls)) <*> (S.fold FL.sum (S.fromList ls)) of - Right is_equal -> is_equal - Left _ -> False + forAll (listOf $ chooseInt (min_value, max_value)) + $ \ls -> + case (==) <$> (S.parseD (P.fromFold FL.sum) (S.fromList ls)) + <*> (S.fold FL.sum (S.fromList ls)) of + Right is_equal -> is_equal + Left _ -> False any :: Property any = @@ -417,9 +419,12 @@ shortestPassRight = shortestFailBoth :: Property shortestFailBoth = - property (case S.parseD (P.shortest (P.die "die") (P.die "die")) (S.fromList [1 :: Int]) of - Right _ -> False - Left _ -> True) + property + (case S.parseD + (P.shortest (P.die "die") (P.die "die")) + (S.fromList [1 :: Int]) of + Right _ -> False + Left _ -> True) longestPass :: Property longestPass = @@ -436,56 +441,75 @@ longestPass = longestPassLeft :: Property longestPassLeft = - property (case S.parseD (P.shortest (P.die "die") (P.yield (1 :: Int))) (S.fromList [1 :: Int]) of - Right r -> r == 1 - Left _ -> False) + property + (case S.parseD + (P.shortest (P.die "die") (P.yield (1 :: Int))) + (S.fromList [1 :: Int]) of + Right r -> r == 1 + Left _ -> False) longestPassRight :: Property longestPassRight = - property (case S.parseD (P.shortest (P.yield (1 :: Int)) (P.die "die")) (S.fromList [1 :: Int]) of - Right r -> r == 1 - Left _ -> False) + property + (case S.parseD + (P.shortest (P.yield (1 :: Int)) (P.die "die")) + (S.fromList [1 :: Int]) of + Right r -> r == 1 + Left _ -> False) longestFailBoth :: Property longestFailBoth = - property (case S.parseD (P.shortest (P.die "die") (P.die "die")) (S.fromList [1 :: Int]) of - Right _ -> False - Left _ -> True) + property + (case S.parseD + (P.shortest (P.die "die") (P.die "die")) + (S.fromList [1 :: Int]) of + Right _ -> False + Left _ -> True) many :: Property many = - forAll (listOf (chooseInt (0, 1))) $ \ls -> - let - concatFold = FL.Fold (\concatList curr_list -> return $ concatList ++ curr_list) (return []) return - prsr = P.many concatFold $ P.sliceSepBy (== 1) FL.toList - in - case S.parseD prsr (S.fromList ls) of - Right res_list -> checkListEqual res_list (Prelude.filter (== 0) ls) - Left _ -> property False + forAll (listOf (chooseInt (0, 1))) + $ \ls -> + let concatFold = + FL.Fold + (\concatList curr_list -> + return $ FL.Partial (concatList ++ curr_list)) + (return []) + return + prsr = P.many concatFold $ P.sliceSepBy (== 1) FL.toList + in case S.parseD prsr (S.fromList ls) of + Right res_list -> + checkListEqual res_list (Prelude.filter (== 0) ls) + Left _ -> property False many_empty :: Property many_empty = - property (case S.parseD (P.many FL.toList (P.die "die")) (S.fromList [1 :: Int]) of - Right res_list -> checkListEqual res_list ([] :: [Int]) - Left _ -> property False) + property + (case S.parseD (P.many FL.toList (P.die "die")) (S.fromList [1 :: Int]) of + Right res_list -> checkListEqual res_list ([] :: [Int]) + Left _ -> property False) some :: Property some = - forAll (listOf (chooseInt (0, 1))) $ \genLs -> - let - ls = 0 : genLs - concatFold = FL.Fold (\concatList curr_list -> return $ concatList ++ curr_list) (return []) return - prsr = P.some concatFold $ P.sliceSepBy (== 1) FL.toList - in - case S.parseD prsr (S.fromList ls) of - Right res_list -> res_list == Prelude.filter (== 0) ls - Left _ -> False + forAll (listOf (chooseInt (0, 1))) + $ \ls -> + let concatFold = + FL.Fold + (\concatList curr_list -> + return $ FL.Partial $ concatList ++ curr_list) + (return []) + return + prsr = P.some concatFold $ P.sliceSepBy (== 1) FL.toList + in case S.parseD prsr (S.fromList ls) of + Right res_list -> res_list == Prelude.filter (== 0) ls + Left _ -> False someFail :: Property someFail = - property (case S.parseD (P.some FL.toList (P.die "die")) (S.fromList [1 :: Int]) of - Right _ -> False - Left _ -> True) + property + (case S.parseD (P.some FL.toList (P.die "die")) (S.fromList [1 :: Int]) of + Right _ -> False + Left _ -> True) ------------------------------------------------------------------------------- -- Instances diff --git a/test/Streamly/Test/Prelude.hs b/test/Streamly/Test/Prelude.hs index f120e985e9..9cafd98443 100644 --- a/test/Streamly/Test/Prelude.hs +++ b/test/Streamly/Test/Prelude.hs @@ -667,9 +667,10 @@ transformCombineOpsCommon constr desc eq t = do withMaxSuccess maxTestCount $ monadicIO $ do cref <- run $ newIORef 0 - let sumfoldinref = FL.Fold (\_ e -> modifyIORef' cref (e+)) - (return ()) - (const $ return ()) + let sumfoldinref = + FL.Fold (\_ e -> FL.Partial <$> modifyIORef' cref (e+)) + (return ()) + (const $ return ()) op = S.tap sumfoldinref . S.mapM (\x -> return (x+1)) listOp = fmap (+1) stream <- run ((S.toList . t) $ op (constr a <> constr b)) From 89bcf96d83130bd7427b814b2a25a3be298c4add Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Fri, 21 Aug 2020 07:08:32 +0530 Subject: [PATCH 02/30] Implement groupsOf, lchunksOf in terms of "many" Also add skeletons for splitWith and concatMap for folds. --- src/Streamly/Internal/Data/Fold.hs | 30 +---- src/Streamly/Internal/Data/Fold/Types.hs | 106 +++++++++++++----- .../Internal/Data/Stream/StreamD/Type.hs | 87 +++++++------- 3 files changed, 125 insertions(+), 98 deletions(-) diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index d70ab8f9dd..637ee827e8 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -197,7 +197,7 @@ module Streamly.Internal.Data.Fold -- * Nested Folds -- , concatMap - , foldChunks + , many , duplicate -- * Running Folds @@ -1636,35 +1636,9 @@ unzip = unzipWith id -- Nesting ------------------------------------------------------------------------------ -{- --- All the stream flattening transformations can also be applied to a fold --- input stream. - --- | This can be used to apply all the stream generation operations on folds. -lconcatMap ::(IsStream t, Monad m) => (a -> t m b) - -> Fold m b c - -> Fold m a c -lconcatMap s f1 f2 = undefined --} - --- All the grouping transformation that we apply to a stream can also be --- applied to a fold input stream. groupBy et al can be written as terminating --- folds and then we can apply foldChunks to use those repeatedly on a stream. - --- | Apply a terminating fold repeatedly to the input of another fold. --- --- Compare with: Streamly.Prelude.concatMap, Streamly.Prelude.foldChunks --- --- /Unimplemented/ --- -{-# INLINABLE foldChunks #-} -foldChunks :: - -- Monad m => - Fold m a b -> Fold m b c -> Fold m a c -foldChunks = undefined {- --- XXX this would be an application of foldChunks using a terminating fold. +-- XXX this would be an application of "many" using a terminating fold. -- -- | Group the input stream into groups of elements between @low@ and @high@. -- Collection starts in chunks of @low@ and then keeps doubling until we reach diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index 6fabffa883..5c31d8521b 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -139,6 +139,9 @@ module Streamly.Internal.Data.Fold.Types , lcatMaybes , ltake , ltakeWhile + + , splitWith + , many , lsessionsOf , lchunksOf , lchunksOf2 @@ -146,6 +149,7 @@ module Streamly.Internal.Data.Fold.Types , duplicate , initialize , runStep + , concatMap ) where @@ -165,6 +169,8 @@ import Data.Semigroup (Semigroup(..)) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import Streamly.Internal.Data.SVar (MonadAsync) +import Prelude hiding (concatMap) + ------------------------------------------------------------------------------ -- Monadic left folds ------------------------------------------------------------------------------ @@ -255,6 +261,22 @@ instance Monad m => Functor (Fold m a) where Done b -> doneM (f b) done' x = fmap f $! done x +-- | Sequential fold application. Apply two folds sequentially to an input +-- stream. The input is provided to the first fold, when it is done the +-- remaining input is provided to the second fold. When the second fold is +-- done, the outputs of the two folds are combined using the supplied function. +-- +-- Note: This is a folding dual of appending streams using +-- 'Streamly.Prelude.serial', it splits the streams using two folds and zips +-- the results. +-- +-- /Unimplemented/ +-- +{-# INLINE splitWith #-} +splitWith :: -- Monad m => + (a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c +splitWith = undefined + -- | The fold resulting from '<*>' distributes its input to both the argument -- folds and combines their output using the supplied function. instance Monad m => Applicative (Fold m a) where @@ -534,43 +556,69 @@ runStep (Fold step initial extract) a = do Partial s -> return $ Fold step (return s) extract Done b -> return $ Fold (\_ _ -> doneM b) (return i) (\_ -> return b) +-- | Map a 'Fold' returning function on the result of a 'Fold'. +-- +-- /Unimplemented/ +-- +{-# INLINE concatMap #-} +concatMap :: -- Monad m => + (b -> Fold m a c) -> Fold m a b -> Fold m a c +concatMap = undefined ------------------------------------------------------------------------------ -- Parsing ------------------------------------------------------------------------------ --- XXX These can be expressed using foldChunks repeatedly on the input of a --- fold. +-- All the grouping transformation that we apply to a stream can also be +-- applied to a fold input stream. groupBy et al can be written as terminating +-- folds and then we can apply "many" to use those repeatedly on a stream. + +-- | Collect zero or more applications of a fold. @many collect split@ applies +-- the @split@ fold repeatedly on the input stream and accumulates zero or more +-- fold results using @collect@. +-- +-- /Internal/ +-- +-- /See also: Streamly.Prelude.concatMap, Streamly.Prelude.foldMany/ +-- +{-# INLINE many #-} +many :: Monad m => Fold m b c -> Fold m a b -> Fold m a c +many (Fold fstep finitial fextract) (Fold step1 initial1 extract1) = + Fold step initial extract + + where + + initial = do + ps <- initial1 -- parse state + fs <- finitial -- fold state + pure (Tuple' ps fs) + + {-# INLINE step #-} + step (Tuple' st fs) a = do + r <- step1 st a + case r of + Partial s -> + return $ Partial (Tuple' s fs) + Done b -> do + s <- initial1 + fs1 <- fstep fs b + case fs1 of + Partial s1 -> return $ Partial (Tuple' s s1) + Done b1 -> return $ Done b1 + + extract (Tuple' s fs) = do + b <- extract1 s + acc <- fstep fs b + case acc of + Partial s1 -> fextract s1 + Done x -> return x -- | For every n input items, apply the first fold and supply the result to the -- next fold. -- {-# INLINE lchunksOf #-} lchunksOf :: Monad m => Int -> Fold m a b -> Fold m b c -> Fold m a c -lchunksOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = - Fold step' initial' extract' - - where - - initial' = Tuple3' 0 <$> liftInitialM initial1 <*> liftInitialM initial2 - step' (Tuple3' i r1 r2) a = - if i < n - then do - res <- liftStep step1 r1 a - partialM $ Tuple3' (i + 1) res r2 - else do - res <- liftExtract extract1 r1 - acc2 <- liftStep step2 r2 res - case acc2 of - Done b -> doneM b - Partial _ -> do - i1 <- initial1 - acc1 <- step1 i1 a - partialM $ Tuple3' 1 acc1 acc2 - extract' (Tuple3' _ r1 r2) = do - res <- liftExtract extract1 r1 - acc2 <- liftStep step2 r2 res - liftExtract extract2 acc2 +lchunksOf n split collect = many collect (ltake n split) {-# INLINE lchunksOf2 #-} lchunksOf2 :: Monad m => Int -> Fold m a b -> Fold2 m x b c -> Fold2 m x a c @@ -671,3 +719,9 @@ lsessionsOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = Left _ -> r2 Right _ -> Left e putMVar mv2 r + +{- +{-# INLINE lsessionsOf #-} +lsessionsOf :: MonadAsync m => Double -> Fold m a b -> Fold m b c -> Fold m a c +lsessionsOf n split collect = many collect (takeByTime n split) +-} diff --git a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs index 97c0a43468..f2a6f17889 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs @@ -62,9 +62,10 @@ import Prelude hiding (map, mapM, foldr, take, concatMap) import Fusion.Plugin.Types (Fuse(..)) import Streamly.Internal.Data.SVar (State(..), adaptState, defState) -import Streamly.Internal.Data.Fold.Types (Fold(..), liftInitialM, liftStep, liftExtract, Fold2(..)) +import Streamly.Internal.Data.Fold.Types (Fold(..), Fold2(..)) import qualified Streamly.Internal.Data.Stream.StreamK as K +import qualified Streamly.Internal.Data.Fold.Types as FL ------------------------------------------------------------------------------ -- The direct style stream type @@ -583,55 +584,53 @@ take n (Stream step state) = n `seq` Stream step' (state, 0) ------------------------------------------------------------------------------ -- s = stream state, fs = fold state -data GroupState s fs +data GroupState s fs b = GroupStart s - | GroupBuffer s fs Int - | GroupYield fs (GroupState s fs) + | GroupBuffer s fs + | GroupYield b (GroupState s fs b) | GroupFinish -{-# INLINE_NORMAL groupsOf #-} -groupsOf - :: Monad m - => Int - -> Fold m a b - -> Stream m a - -> Stream m b -groupsOf n (Fold fstep initial extract) (Stream step state) = - n `seq` Stream step' (GroupStart state) +{-# INLINE_NORMAL foldMany #-} +foldMany :: Monad m => Fold m a b -> Stream m a -> Stream m b +foldMany (Fold fstep initial extract) (Stream step state) = + Stream step' (GroupStart state) where {-# INLINE_LATE step' #-} step' _ (GroupStart st) = do - -- XXX shall we use the Natural type instead? Need to check performance - -- implications. - when (n <= 0) $ - -- XXX we can pass the module string from the higher level API - error $ "Streamly.Internal.Data.Stream.StreamD.Type.groupsOf: the size of " - ++ "groups [" ++ show n ++ "] must be a natural number" -- fs = fold state - fs <- liftInitialM initial - return $ Skip (GroupBuffer st fs 0) + fs <- initial + return $ Skip (GroupBuffer st fs) - step' gst (GroupBuffer st fs i) = do + step' gst (GroupBuffer st fs) = do r <- step (adaptState gst) st case r of Yield x s -> do - !fs' <- liftStep fstep fs x - let i' = i + 1 - return $ - if i' >= n - then Skip (GroupYield fs' (GroupStart s)) - else Skip (GroupBuffer s fs' i') - Skip s -> return $ Skip (GroupBuffer s fs i) - Stop -> return $ Skip (GroupYield fs GroupFinish) - - step' _ (GroupYield fs next) = do - r <- liftExtract extract fs - return $ Yield r next + fs' <- fstep fs x + return $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart s)) + FL.Partial ps -> Skip (GroupBuffer s ps) + Skip s -> + return $ Skip (GroupBuffer s fs) + Stop -> do + b <- extract fs + return $ Skip (GroupYield b GroupFinish) + + step' _ (GroupYield b next) = return $ Yield b next step' _ GroupFinish = return Stop +{-# INLINE groupsOf #-} +groupsOf :: Monad m => Int -> Fold m a b -> Stream m a -> Stream m b +groupsOf n fld = foldMany (FL.ltake n fld) + +data GroupState2 s fs + = GroupStart2 s + | GroupBuffer2 s fs Int + | GroupYield2 fs (GroupState2 s fs) + | GroupFinish2 + {-# INLINE_NORMAL groupsOf2 #-} groupsOf2 :: Monad m @@ -641,12 +640,12 @@ groupsOf2 -> Stream m a -> Stream m b groupsOf2 n input (Fold2 fstep inject extract) (Stream step state) = - n `seq` Stream step' (GroupStart state) + n `seq` Stream step' (GroupStart2 state) where {-# INLINE_LATE step' #-} - step' _ (GroupStart st) = do + step' _ (GroupStart2 st) = do -- XXX shall we use the Natural type instead? Need to check performance -- implications. when (n <= 0) $ @@ -655,9 +654,9 @@ groupsOf2 n input (Fold2 fstep inject extract) (Stream step state) = ++ "groups [" ++ show n ++ "] must be a natural number" -- fs = fold state fs <- input >>= inject - return $ Skip (GroupBuffer st fs 0) + return $ Skip (GroupBuffer2 st fs 0) - step' gst (GroupBuffer st fs i) = do + step' gst (GroupBuffer2 st fs i) = do r <- step (adaptState gst) st case r of Yield x s -> do @@ -665,13 +664,13 @@ groupsOf2 n input (Fold2 fstep inject extract) (Stream step state) = let i' = i + 1 return $ if i' >= n - then Skip (GroupYield fs' (GroupStart s)) - else Skip (GroupBuffer s fs' i') - Skip s -> return $ Skip (GroupBuffer s fs i) - Stop -> return $ Skip (GroupYield fs GroupFinish) + then Skip (GroupYield2 fs' (GroupStart2 s)) + else Skip (GroupBuffer2 s fs' i') + Skip s -> return $ Skip (GroupBuffer2 s fs i) + Stop -> return $ Skip (GroupYield2 fs GroupFinish2) - step' _ (GroupYield fs next) = do + step' _ (GroupYield2 fs next) = do r <- extract fs return $ Yield r next - step' _ GroupFinish = return Stop + step' _ GroupFinish2 = return Stop From 36b3d9e271baf44ed8bfe53c4bca7c35c1ca8428 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Tue, 25 Aug 2020 03:54:00 +0530 Subject: [PATCH 03/30] Rewrite code removing fold helpers Remove: - liftStep - liftInitial - liftExtract - liftInitialM - liftInitial - partialM - doneM --- src/Streamly.hs | 2 +- src/Streamly/Internal/Data/Array.hs | 14 +- .../Data/Array/Prim/MutTypesInclude.hs | 71 +- .../Data/Array/Prim/Pinned/Mut/Types.hs | 2 +- .../Internal/Data/Array/Prim/TypesInclude.hs | 15 +- .../Internal/Data/Array/PrimInclude.hs | 4 +- .../Internal/Data/Array/Storable/Foreign.hs | 6 +- .../Data/Array/Storable/Foreign/Mut/Types.hs | 25 +- src/Streamly/Internal/Data/Fold.hs | 992 ++++++++++----- src/Streamly/Internal/Data/Fold/Types.hs | 518 +++++--- src/Streamly/Internal/Data/Parser.hs | 2 + src/Streamly/Internal/Data/Parser/ParserD.hs | 288 ++--- .../Internal/Data/Parser/ParserD/Types.hs | 76 +- src/Streamly/Internal/Data/SmallArray.hs | 14 +- src/Streamly/Internal/Data/Stream/IsStream.hs | 275 ++-- src/Streamly/Internal/Data/Stream/Prelude.hs | 26 +- src/Streamly/Internal/Data/Stream/StreamD.hs | 1110 ++++++++++------- .../Internal/Data/Stream/StreamD/Type.hs | 83 +- src/Streamly/Internal/Data/Stream/StreamK.hs | 16 +- src/Streamly/Internal/Data/Unfold.hs | 1 + src/Streamly/Internal/FileSystem/File.hs | 2 +- src/Streamly/Internal/Network/Inet/TCP.hs | 2 +- src/Streamly/Internal/Unicode/Stream.hs | 2 +- 23 files changed, 2202 insertions(+), 1344 deletions(-) diff --git a/src/Streamly.hs b/src/Streamly.hs index cd3597c839..ae03e8c59b 100644 --- a/src/Streamly.hs +++ b/src/Streamly.hs @@ -272,7 +272,7 @@ import qualified Streamly.Internal.Data.Stream.Async as Async -- reducers of streams. Reducers can be combined to consume a stream source in -- many ways. The simplest is to reduce a stream source using a fold e.g.: -- --- > S.runFold FL.length $ S.enumerateTo 100 +-- > S.foldOnce FL.length $ S.enumerateTo 100 -- -- Folds are consumers of streams and can be used to split a stream into -- multiple independent flows. Grouping transforms a stream by applying a fold diff --git a/src/Streamly/Internal/Data/Array.hs b/src/Streamly/Internal/Data/Array.hs index 5439adef79..6fc6bdc28e 100644 --- a/src/Streamly/Internal/Data/Array.hs +++ b/src/Streamly/Internal/Data/Array.hs @@ -112,11 +112,11 @@ writeN limit = Fold step initial extract initial = do marr <- liftIO $ newArray limit bottomElement return (Tuple' marr 0) - step (Tuple' marr i) x - | i == limit = fmap FL.Done $ liftIO $ freezeArray marr 0 i + step st@(Tuple' marr i) x + | i == limit = fmap FL.Done $ extract st | otherwise = do liftIO $ writeArray marr i x - FL.partialM $ Tuple' marr (i + 1) + return $ FL.Partial $ Tuple' marr (i + 1) extract (Tuple' marr len) = liftIO $ freezeArray marr 0 len {-# INLINE_NORMAL write #-} @@ -132,10 +132,10 @@ write = Fold step initial extract in do newMarr <- liftIO $ newArray newCapacity bottomElement liftIO $ copyMutableArray newMarr 0 marr 0 i liftIO $ writeArray newMarr i x - FL.partialM $ Tuple3' newMarr (i + 1) newCapacity + return $ FL.Partial $ Tuple3' newMarr (i + 1) newCapacity | otherwise = do liftIO $ writeArray marr i x - FL.partialM $ Tuple3' marr (i + 1) capacity + return $ FL.Partial $ Tuple3' marr (i + 1) capacity extract (Tuple3' marr len _) = liftIO $ freezeArray marr 0 len {-# INLINE_NORMAL fromStreamDN #-} @@ -151,7 +151,7 @@ fromStreamDN limit str = do {-# INLINE fromStreamD #-} fromStreamD :: MonadIO m => D.Stream m a -> m (Array a) -fromStreamD = D.runFold write +fromStreamD = D.foldOnce write {-# INLINABLE fromListN #-} fromListN :: Int -> [a] -> Array a @@ -187,7 +187,7 @@ toStreamRev = D.fromStreamD . toStreamDRev {-# INLINE fold #-} fold :: Monad m => Fold m a b -> Array a -> m b -fold f arr = D.runFold f (toStreamD arr) +fold f arr = D.foldOnce f (toStreamD arr) {-# INLINE streamFold #-} streamFold :: Monad m => (SerialT m a -> m b) -> Array a -> m b diff --git a/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs b/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs index ad1ac482d8..73c2c83ce2 100644 --- a/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs +++ b/src/Streamly/Internal/Data/Array/Prim/MutTypesInclude.hs @@ -176,7 +176,7 @@ writeN limit = Fold step initial extract | i == limit = FL.Done <$> extract s | otherwise = do unsafeWriteIndex marr i x - FL.partialM $ Tuple' marr (i + 1) + return $ FL.Partial $ Tuple' marr (i + 1) -- Use Tuple' instead? data ArrayUnsafe a = ArrayUnsafe @@ -215,7 +215,7 @@ fromStreamDN limit str = do {-# INLINE fromStreamD #-} fromStreamD :: (MonadIO m, Prim a) => D.Stream m a -> m (Array a) -fromStreamD str = D.runFold write str +fromStreamD str = D.foldOnce write str {-# INLINABLE fromListNM #-} fromListNM :: (MonadIO m, Prim a) => Int -> [a] -> m (Array a) @@ -354,16 +354,16 @@ packArraysChunksOf n (D.Stream step state) = {-# INLINE_NORMAL lpackArraysChunksOf #-} lpackArraysChunksOf :: (MonadIO m, Prim a) => Int -> Fold m (Array a) () -> Fold m (Array a) () -lpackArraysChunksOf n (Fold step1 initial1 extract1) = - Fold step initial extract +lpackArraysChunksOf n (Fold step1 initial1 extract1) = Fold step initial extract where initial = do - when (n <= 0) $ + when (n <= 0) + $ error + $ "Streamly.Internal.Data.Array.Storable.Foreign.Mut.Types.packArraysChunksOf: the size of " + ++ "arrays [" ++ show n ++ "] must be a natural number" -- XXX we can pass the module string from the higher level API - error $ "Streamly.Internal.Data.Array.Storable.Foreign.Mut.Types.packArraysChunksOf: the size of " - ++ "arrays [" ++ show n ++ "] must be a natural number" r1 <- initial1 return (Tuple' Nothing r1) @@ -372,34 +372,35 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = r <- step1 r1 buf case r of FL.Partial rr -> extract1 rr - FL.Done _ -> return () + FL.Done () -> return () + FL.Done1 () -> return () step (Tuple' Nothing r1) arr = do - len <- byteLength arr - if len >= n - then do - r <- step1 r1 arr - case r of - FL.Done _ -> FL.doneM () - FL.Partial s -> do - extract1 s - r1' <- initial1 - FL.partialM $ Tuple' Nothing r1' - else FL.partialM $ Tuple' (Just arr) r1 - + len <- byteLength arr + if len >= n + then do + r <- step1 r1 arr + case r of + FL.Done () -> return $ FL.Done () + FL.Done1 () -> return $ FL.Done1 () + FL.Partial s -> do + extract1 s + r1' <- initial1 + return $ FL.Partial $ Tuple' Nothing r1' + else return $ FL.Partial $ Tuple' (Just arr) r1 step (Tuple' (Just buf) r1) arr = do - blen <- byteLength buf - alen <- byteLength arr - let len = blen + alen - buf' <- spliceTwo buf arr - - if len >= n - then do - r <- step1 r1 buf' - case r of - FL.Done _ -> FL.doneM () - FL.Partial s -> do - extract1 s - r1' <- initial1 - FL.partialM $ Tuple' Nothing r1' - else FL.partialM $ Tuple' (Just buf') r1 + blen <- byteLength buf + alen <- byteLength arr + let len = blen + alen + buf' <- spliceTwo buf arr + if len >= n + then do + r <- step1 r1 buf' + case r of + FL.Done () -> return $ FL.Done () + FL.Done1 () -> return $ FL.Done () + FL.Partial s -> do + extract1 s + r1' <- initial1 + return $ FL.Partial $ Tuple' Nothing r1' + else return $ FL.Partial $ Tuple' (Just buf') r1 diff --git a/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs b/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs index cbaab959b4..b7c66c694e 100644 --- a/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs +++ b/src/Streamly/Internal/Data/Array/Prim/Pinned/Mut/Types.hs @@ -151,7 +151,7 @@ writeNAligned align limit = Fold step initial extract | i == limit = FL.Done <$> extract s | otherwise = do unsafeWriteIndex marr i x - FL.partialM $ Tuple' marr (i + 1) + return $ FL.Partial $ Tuple' marr (i + 1) ------------------------------------------------------------------------------- -- Mutation with pointers diff --git a/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs b/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs index 17a9348a27..95d0720399 100644 --- a/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs +++ b/src/Streamly/Internal/Data/Array/Prim/TypesInclude.hs @@ -613,6 +613,7 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = case r of FL.Partial rr -> extract1 rr FL.Done _ -> return () + FL.Done1 _ -> return () step (Tuple3' Nothing' _ r1) arr = @@ -620,15 +621,16 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = then do r <- step1 r1 arr case r of - FL.Done _ -> FL.doneM () + FL.Done _ -> return $ FL.Done () + FL.Done1 _ -> return $ FL.Done1 () FL.Partial s -> do extract1 s r1' <- initial1 - FL.partialM $ Tuple3' Nothing' 0 r1' + return $ FL.Partial $ Tuple3' Nothing' 0 r1' else do buf <- MA.newArray nElem noff <- spliceInto buf 0 arr - FL.partialM $ Tuple3' (Just' buf) noff r1 + return $ FL.Partial $ Tuple3' (Just' buf) noff r1 step (Tuple3' (Just' buf) boff r1) arr = do noff <- spliceInto buf boff arr @@ -638,12 +640,13 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = nArr <- unsafeFreeze buf r <- step1 r1 (slice nArr 0 noff) case r of - FL.Done _ -> FL.doneM () + FL.Done _ -> return $ FL.Done () + FL.Done1 _ -> return $ FL.Done1 () FL.Partial s -> do extract1 s r1' <- initial1 - FL.partialM $ Tuple3' Nothing' 0 r1' - else FL.partialM $ Tuple3' (Just' buf) noff r1 + return $ FL.Partial $ Tuple3' Nothing' 0 r1' + else return $ FL.Partial $ Tuple3' (Just' buf) noff r1 data SplitState s arr = Initial s diff --git a/src/Streamly/Internal/Data/Array/PrimInclude.hs b/src/Streamly/Internal/Data/Array/PrimInclude.hs index 85b0ddd452..9af65b400a 100644 --- a/src/Streamly/Internal/Data/Array/PrimInclude.hs +++ b/src/Streamly/Internal/Data/Array/PrimInclude.hs @@ -45,7 +45,7 @@ fromStreamN n m = do -- /Internal/ {-# INLINE fromStream #-} fromStream :: (MonadIO m, Prim a) => SerialT m a -> m (Array a) -fromStream = P.runFold A.write +fromStream = P.foldOnce A.write -- write m = A.fromStreamD $ D.toStreamD m ------------------------------------------------------------------------------- @@ -128,7 +128,7 @@ null arr = length arr == 0 -- /Internal/ {-# INLINE fold #-} fold :: forall m a b. (MonadIO m, Prim a) => Fold m a b -> Array a -> m b -fold f arr = P.runFold f (toStream arr :: Serial.SerialT m a) +fold f arr = P.foldOnce f (toStream arr :: Serial.SerialT m a) -- | Fold an array using a stream fold operation. -- diff --git a/src/Streamly/Internal/Data/Array/Storable/Foreign.hs b/src/Streamly/Internal/Data/Array/Storable/Foreign.hs index a268443182..82e6024700 100644 --- a/src/Streamly/Internal/Data/Array/Storable/Foreign.hs +++ b/src/Streamly/Internal/Data/Array/Storable/Foreign.hs @@ -195,7 +195,7 @@ fromStreamN n m = do -- /Internal/ {-# INLINE fromStream #-} fromStream :: (MonadIO m, Storable a) => SerialT m a -> m (Array a) -fromStream = P.runFold A.write +fromStream = P.foldOnce A.write -- write m = A.fromStreamD $ D.toStreamD m ------------------------------------------------------------------------------- @@ -500,7 +500,7 @@ runPipe f arr = P.runPipe (toArrayMinChunk (length arr)) $ f (A.read arr) streamTransform :: forall m a b. (MonadIO m, Storable a, Storable b) => (SerialT m a -> SerialT m b) -> Array a -> m (Array b) streamTransform f arr = - P.runFold (A.toArrayMinChunk (alignment (undefined :: a)) (length arr)) + P.foldOnce (A.toArrayMinChunk (alignment (undefined :: a)) (length arr)) $ f (toStream arr) ------------------------------------------------------------------------------- @@ -570,7 +570,7 @@ asCString arr act = do -- /Internal/ {-# INLINE fold #-} fold :: forall m a b. (MonadIO m, Storable a) => Fold m a b -> Array a -> m b -fold f arr = P.runFold f (toStream arr :: Serial.SerialT m a) +fold f arr = P.foldOnce f (toStream arr :: Serial.SerialT m a) -- | Fold an array using a stream fold operation. -- diff --git a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs index 401cbc1cd2..21f9835451 100644 --- a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs +++ b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs @@ -423,9 +423,9 @@ fromStreamD m = do len <- K.foldl' (+) 0 (K.map length buffered) fromStreamDN len $ flattenArrays $ D.fromStreamK buffered {- -fromStreamD m = runFold write m +fromStreamD m = foldOnce write m where - runFold (Fold step begin done) = D.foldlMx' step begin done + foldOnce (Fold step begin done) = D.foldlMx' step begin done -} @@ -679,6 +679,7 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = case r of FL.Partial rr -> extract1 rr FL.Done _ -> return () + FL.Done1 _ -> return () step (Tuple' Nothing r1) arr = let len = byteLength arr @@ -686,12 +687,13 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = then do r <- step1 r1 arr case r of - FL.Done _ -> FL.doneM () + FL.Done _ -> return $ FL.Done () + FL.Done1 _ -> return $ FL.Done1 () FL.Partial s -> do extract1 s r1' <- initial1 - FL.partialM $ Tuple' Nothing r1' - else FL.partialM $ Tuple' (Just arr) r1 + return $ FL.Partial $ Tuple' Nothing r1' + else return $ FL.Partial $ Tuple' (Just arr) r1 step (Tuple' (Just buf) r1) arr = do let len = byteLength buf + byteLength arr @@ -704,12 +706,13 @@ lpackArraysChunksOf n (Fold step1 initial1 extract1) = then do r <- step1 r1 buf'' case r of - FL.Done _ -> FL.doneM () + FL.Done _ -> return $ FL.Done () + FL.Done1 _ -> return $ FL.Done1 () FL.Partial s -> do extract1 s r1' <- initial1 - FL.partialM $ Tuple' Nothing r1' - else FL.partialM $ Tuple' (Just buf'') r1 + return $ FL.Partial $ Tuple' Nothing r1' + else return $ FL.Partial $ Tuple' (Just buf'') r1 #if !defined(mingw32_HOST_OS) data GatherState s arr @@ -1086,10 +1089,10 @@ writeNAllocWith alloc n = Fold step initial extract where initial = liftIO $ alloc (max n 0) - step arr@(Array _ end bound) _ | end == bound = FL.doneM arr + step arr@(Array _ end bound) _ | end == bound = return $ FL.Done arr step (Array start end bound) x = do liftIO $ poke end x - FL.partialM $ Array start (end `plusPtr` sizeOf (undefined :: a)) bound + return $ FL.Partial $ Array start (end `plusPtr` sizeOf (undefined :: a)) bound -- XXX note that shirkToFit does not maintain alignment, in case we are -- using aligned allocation. extract = return -- liftIO . shrinkToFit @@ -1148,7 +1151,7 @@ writeNUnsafe n = Fold step initial extract return $ ArrayUnsafe start end step (ArrayUnsafe start end) x = do liftIO $ poke end x - FL.partialM $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) + return $ FL.Partial $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) extract (ArrayUnsafe start end) = return $ Array start end end -- liftIO . shrinkToFit -- XXX The realloc based implementation needs to make one extra copy if we use diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index 637ee827e8..54bb1a252a 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -17,8 +17,6 @@ module Streamly.Internal.Data.Fold -- * Fold Type Step (..) , Fold (..) - , partialM - , doneM , hoist , generally @@ -158,6 +156,11 @@ module Streamly.Internal.Data.Fold -- , breakAround , spanBy , spanByRolling + , groupBy + , groupByRolling + , sliceSepBy + , sliceSepWith + , wordBy -- By sequences -- , breakOnSeq @@ -171,8 +174,10 @@ module Streamly.Internal.Data.Fold -- * Partitioning - -- , partitionByM - -- , partitionBy + , partitionByM + , partitionByFstM + , partitionByMinM + , partitionBy , partition -- * Demultiplexing @@ -192,8 +197,13 @@ module Streamly.Internal.Data.Fold -- * Unzipping , unzip -- These can be expressed using lmap/lmapM and unzip - -- , unzipWith - -- , unzipWithM + + -- XXX Why comment them? Maybe of the combinators that we currently have can + -- be expressed using some other combinators. + , unzipWith + , unzipWithM + , unzipWithFstM + , unzipWithMinM -- * Nested Folds -- , concatMap @@ -221,7 +231,9 @@ import Data.Semigroup (Semigroup((<>))) import Streamly.Internal.Data.Pipe.Types (Pipe (..), PipeState(..)) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import Streamly.Internal.Data.Maybe.Strict (Maybe'(..), toMaybe) +import Streamly.Internal.Data.Either.Strict (Either'(..)) +import qualified Data.List as List import qualified Streamly.Internal.Data.Fold.Types as FL import qualified Streamly.Internal.Data.Pipe.Types as Pipe import qualified Data.Map.Strict as Map @@ -241,18 +253,21 @@ import Streamly.Internal.Data.Fold.Types -- Smart constructors ------------------------------------------------------------------------------ --- | Make a non terminating fold using a pure step function, a pure initial --- state and a pure state extraction function. +-- | Make an accumulating (non-terminating) fold using a pure step function, a +-- pure initial state and a pure state extraction function. -- -- /Internal/ -- {-# INLINE mkAccum #-} mkAccum :: Monad m => (s -> a -> s) -> s -> (s -> b) -> Fold m a b mkAccum step initial extract = - Fold (\s a -> partialM $ step s a) (return initial) (return . extract) + Fold + (\s a -> return $ Partial $ step s a) + (return initial) + (return . extract) --- | Make a non terminating fold using a pure step function and a pure initial --- state. The final state extracted is identical to the intermediate state. +-- | Similar to 'mkAccum' but the final state extracted is identical to the +-- intermediate state. -- -- /Internal/ -- @@ -260,8 +275,8 @@ mkAccum step initial extract = mkAccum_ :: Monad m => (b -> a -> b) -> b -> Fold m a b mkAccum_ step initial = mkAccum step initial id --- | Make a non terminating fold with an effectful step function and initial --- state, and a state extraction function. +-- | Make an accumulating (non-terminating) fold with an effectful step +-- function, an initial state, and a state extraction function. -- -- /Internal/ -- @@ -269,8 +284,8 @@ mkAccum_ step initial = mkAccum step initial id mkAccumM :: Functor m => (s -> a -> m s) -> m s -> (s -> m b) -> Fold m a b mkAccumM step = Fold (\s a -> Partial <$> step s a) --- | Make a non terminating fold with an effectful step function and initial --- state. The final state extracted is identical to the intermediate state. +-- | Similar to 'mkAccumM' but the final state extracted is identical to the +-- intermediate state. -- -- /Internal/ -- @@ -278,8 +293,8 @@ mkAccumM step = Fold (\s a -> Partial <$> step s a) mkAccumM_ :: Monad m => (b -> a -> m b) -> m b -> Fold m a b mkAccumM_ step initial = mkAccumM step initial return --- | Make a fold using a pure step function, a pure initial state and --- a pure state extraction function. +-- | Make a terminating fold using a pure step function, a pure initial state +-- and a pure state extraction function. -- -- /Internal/ -- @@ -288,8 +303,8 @@ mkFold :: Monad m => (s -> a -> Step s b) -> s -> (s -> b) -> Fold m a b mkFold step initial extract = Fold (\s a -> return $ step s a) (return initial) (return . extract) --- | Make a fold using a pure step function and a pure initial state. The --- final state extracted is identical to the intermediate state. +-- | Similar to 'mkFold' but the final state extracted is identical to the +-- intermediate state. -- -- /Internal/ -- @@ -297,8 +312,8 @@ mkFold step initial extract = mkFold_ :: Monad m => (b -> a -> Step b b) -> b -> Fold m a b mkFold_ step initial = mkFold step initial id --- | Make a fold with an effectful step function and initial state, and a state --- extraction function. +-- | Make a terminating fold with an effectful step function and initial state, +-- and a state extraction function. -- -- > mkFoldM = Fold -- @@ -310,8 +325,8 @@ mkFold_ step initial = mkFold step initial id mkFoldM :: (s -> a -> m (Step s b)) -> m s -> (s -> m b) -> Fold m a b mkFoldM = Fold --- | Make a fold with an effectful step function and initial state. The final --- state extracted is identical to the intermediate state. +-- | Similar to 'mkFoldM' but the final state extracted is identical to the +-- intermediate state. -- -- /Internal/ -- @@ -348,12 +363,16 @@ generally = hoist (return . runIdentity) {-# INLINE sequence #-} sequence :: Monad m => Fold m a (m b) -> Fold m a b sequence (Fold step initial extract) = Fold step' initial extract' - where + + where + step' s a = do res <- step s a case res of - Partial x -> partialM x + Partial x -> return $ Partial x Done b -> b >>= return . Done + Done1 b -> b >>= return . Done1 + extract' = join . extract -- | Map a monadic function on the output of a fold. @@ -380,27 +399,30 @@ transform (Pipe pstep1 pstep2 pinitial) (Fold fstep finitial fextract) = where initial = Tuple' pinitial <$> finitial + errorMsgOnDone1 = + "Only only accumulators or folds not returning Done1 are supported by this operation." + step (Tuple' ps fs) x = do r <- pstep1 ps x go fs r + -- XXX use SPEC? where - -- XXX use SPEC? + go acc (Pipe.Yield b (Consume ps')) = do acc' <- fstep acc b case acc' of - Partial s -> partialM $ Tuple' ps' s - Done b2 -> doneM b2 - + Partial s -> return $ Partial $ Tuple' ps' s + Done b2 -> return $ Done b2 + Done1 _ -> error errorMsgOnDone1 go acc (Pipe.Yield b (Produce ps')) = do acc' <- fstep acc b r <- pstep2 ps' case acc' of Partial s -> go s r - Done b2 -> doneM b2 - - go acc (Pipe.Continue (Consume ps')) = partialM $ Tuple' ps' acc - + Done b2 -> return $ Done b2 + Done1 _ -> error errorMsgOnDone1 + go acc (Pipe.Continue (Consume ps')) = return $ Partial $ Tuple' ps' acc go acc (Pipe.Continue (Produce ps')) = do r <- pstep2 ps' go acc r @@ -418,9 +440,11 @@ transform (Pipe pstep1 pstep2 pinitial) (Fold fstep finitial fextract) = {-# INLINABLE _Fold1 #-} _Fold1 :: Monad m => (a -> a -> a) -> Fold m a (Maybe a) _Fold1 step = Fold step_ (return Nothing') (return . toMaybe) - where - step_ Nothing' a = partialM $ Just' a - step_ (Just' x) a = partialM $ Just' $ step x a + + where + + step_ Nothing' a = return $ Partial $ Just' a + step_ (Just' x) a = return $ Partial $ Just' $ step x a ------------------------------------------------------------------------------ -- Left folds @@ -439,9 +463,13 @@ _Fold1 step = Fold step_ (return Nothing') (return . toMaybe) {-# INLINABLE drain #-} drain :: Monad m => Fold m a () drain = Fold step begin done + where + begin = return () - step _ _ = FL.partialM () + + step _ _ = return $ FL.Partial () + done = return -- | @@ -480,7 +508,7 @@ last = _Fold1 (flip const) -- @since 0.7.0 {-# INLINE genericLength #-} genericLength :: (Monad m, Num b) => Fold m a b -genericLength = Fold (\n _ -> partialM $ n + 1) (return 0) return +genericLength = Fold (\n _ -> return $ Partial $ n + 1) (return 0) return -- | Determine the length of the input stream. -- @@ -500,7 +528,7 @@ length = genericLength -- @since 0.7.0 {-# INLINE sum #-} sum :: (Monad m, Num a) => Fold m a a -sum = Fold (\x a -> partialM $ x + a) (return 0) return +sum = Fold (\x a -> return $ Partial $ x + a) (return 0) return -- | Determine the product of all elements of a stream of numbers. Returns -- multiplicative identity (@1@) when the stream is empty. @@ -510,7 +538,7 @@ sum = Fold (\x a -> partialM $ x + a) (return 0) return -- @since 0.7.0 {-# INLINABLE product #-} product :: (Monad m, Num a) => Fold m a a -product = Fold (\x a -> partialM $ x * a) (return 1) return +product = Fold (\x a -> return $ Partial $ x * a) (return 1) return ------------------------------------------------------------------------------ -- To Summary (Maybe) @@ -523,10 +551,13 @@ product = Fold (\x a -> partialM $ x * a) (return 1) return {-# INLINABLE maximumBy #-} maximumBy :: Monad m => (a -> a -> Ordering) -> Fold m a (Maybe a) maximumBy cmp = _Fold1 max' - where - max' x y = case cmp x y of - GT -> x - _ -> y + + where + + max' x y = + case cmp x y of + GT -> x + _ -> y -- | -- @ @@ -548,10 +579,13 @@ maximum = _Fold1 max {-# INLINABLE minimumBy #-} minimumBy :: Monad m => (a -> a -> Ordering) -> Fold m a (Maybe a) minimumBy cmp = _Fold1 min' - where - min' x y = case cmp x y of - GT -> y - _ -> x + + where + + min' x y = + case cmp x y of + GT -> y + _ -> x -- | Determine the minimum element in a stream using the supplied comparison -- function. @@ -578,11 +612,16 @@ minimum = _Fold1 min {-# INLINABLE mean #-} mean :: (Monad m, Fractional a) => Fold m a a mean = Fold step (return begin) (return . done) - where + + where + begin = Tuple' 0 0 - step (Tuple' x n) y = return $ - let n' = n + 1 - in Partial $ Tuple' (x + (y - x) / n') n' + + step (Tuple' x n) y = + return + $ let n' = n + 1 + in Partial $ Tuple' (x + (y - x) / n') n' + done (Tuple' x _) = x -- | Compute a numerically stable (population) variance over all elements in @@ -592,15 +631,19 @@ mean = Fold step (return begin) (return . done) {-# INLINABLE variance #-} variance :: (Monad m, Fractional a) => Fold m a a variance = Fold step (return begin) (return . done) - where + + where + begin = Tuple3' 0 0 0 - step (Tuple3' n mean_ m2) x = partialM $ Tuple3' n' mean' m2' - where - n' = n + 1 - mean' = (n * mean_ + x) / (n + 1) - delta = x - mean_ - m2' = m2 + delta * delta * n / (n + 1) + step (Tuple3' n mean_ m2) x = return $ Partial $ Tuple3' n' mean' m2' + + where + + n' = n + 1 + mean' = (n * mean_ + x) / (n + 1) + delta = x - mean_ + m2' = m2 + delta * delta * n / (n + 1) done (Tuple3' n _ m2) = m2 / n @@ -627,10 +670,15 @@ stdDev = sqrt variance {-# INLINABLE rollingHashWithSalt #-} rollingHashWithSalt :: (Monad m, Enum a) => Int64 -> Fold m a Int64 rollingHashWithSalt salt = Fold step initial extract + where + k = 2891336453 :: Int64 + initial = return salt - step cksum a = partialM $ cksum * k + fromIntegral (fromEnum a) + + step cksum a = return $ Partial $ cksum * k + fromIntegral (fromEnum a) + extract = return -- | A default salt used in the implementation of 'rollingHash'. @@ -666,8 +714,8 @@ rollingHashFirstN n = ltake n rollingHash -- /Internal/ -- {-# INLINE sconcat #-} -sconcat :: (Monad m, Monoid a) => a -> Fold m a a -sconcat i = Fold (\x a -> partialM $ mappend x a) (return i) return +sconcat :: (Monad m, Semigroup a) => a -> Fold m a a +sconcat i = Fold (\x a -> return $ Partial $ x <> a) (return i) return -- | Fold an input stream consisting of monoidal elements using 'mappend' -- and 'mempty'. @@ -677,7 +725,7 @@ sconcat i = Fold (\x a -> partialM $ mappend x a) (return i) return -- @since 0.7.0 {-# INLINABLE mconcat #-} mconcat :: (Monad m, Monoid a) => Fold m a a -mconcat = Fold (\x a -> partialM $ mappend x a) (return mempty) return +mconcat = sconcat mempty -- | -- > foldMap f = lmap f mconcat @@ -708,9 +756,13 @@ foldMap f = lmap f mconcat {-# INLINABLE foldMapM #-} foldMapM :: (Monad m, Monoid b) => (a -> m b) -> Fold m a b foldMapM act = Fold step begin done + where + done = return + begin = return mempty + step m a = do m' <- act a return $! Partial $! mappend m m' @@ -729,7 +781,7 @@ foldMapM act = Fold step begin done -- id . (x1 :) . (x2 :) . (x3 :) . ... . (xn :) $ [] {-# INLINABLE toList #-} toList :: Monad m => Fold m a [a] -toList = Fold (\f x -> partialM $ f . (x :)) +toList = Fold (\f x -> return $ Partial $ f . (x :)) (return id) (return . ($ [])) @@ -759,10 +811,14 @@ drainWhile p = ltakeWhile p drain {-# INLINABLE genericIndex #-} genericIndex :: (Integral i, Monad m) => i -> Fold m a (Maybe a) genericIndex i = Fold step (return 0) (const (return Nothing)) - where - step j a = return $ if i == j - then Done $ Just a - else Partial (j + 1) + + where + + step j a = + return + $ if i == j + then Done $ Just a + else Partial (j + 1) -- | Lookup the element at the given index. -- @@ -791,10 +847,14 @@ head = _Fold1 const {-# INLINABLE find #-} find :: Monad m => (a -> Bool) -> Fold m a (Maybe a) find predicate = Fold step (return ()) (const (return Nothing)) - where - step _ a = return $ if predicate a - then Done (Just a) - else Partial () + + where + + step () a = + return + $ if predicate a + then Done (Just a) + else Partial () -- | In a stream of (key-value) pairs @(a, b)@, return the value @b@ of the -- first pair where the key equals the given value @a@. @@ -805,10 +865,14 @@ find predicate = Fold step (return ()) (const (return Nothing)) {-# INLINABLE lookup #-} lookup :: (Eq a, Monad m) => a -> Fold m (a,b) (Maybe b) lookup a0 = Fold step (return ()) (const (return Nothing)) - where - step _ (a,b) = return $ if a == a0 - then Done $ Just b - else Partial () + + where + + step () (a, b) = + return + $ if a == a0 + then Done $ Just b + else Partial () -- | Returns the first index that satisfies the given predicate. -- @@ -816,10 +880,14 @@ lookup a0 = Fold step (return ()) (const (return Nothing)) {-# INLINABLE findIndex #-} findIndex :: Monad m => (a -> Bool) -> Fold m a (Maybe Int) findIndex predicate = Fold step (return 0) (const (return Nothing)) - where - step i a = return $ if predicate a - then Done $ Just i - else Partial (i + 1) + + where + + step i a = + return + $ if predicate a + then Done $ Just i + else Partial (i + 1) -- | Returns the first index where a given value is found in the stream. -- @@ -841,7 +909,7 @@ elemIndex a = findIndex (a ==) -- @since 0.7.0 {-# INLINABLE null #-} null :: Monad m => Fold m a Bool -null = Fold (\_ _ -> doneM False) (return True) return +null = Fold (\() _ -> return $ Done False) (return ()) (\() -> return True) -- | -- > any p = lmap p or @@ -850,16 +918,19 @@ null = Fold (\_ _ -> doneM False) (return True) return -- | Returns 'True' if any of the elements of a stream satisfies a predicate. -- -- @since 0.7.0 -{-# INLINABLE any #-} +{-# INLINE any #-} any :: Monad m => (a -> Bool) -> Fold m a Bool -any predicate = - Fold - (\_ a -> - if predicate a - then return $ FL.Done True - else return $ FL.Partial False) - (return False) +any predicate = Fold step initial return + + where + + initial = return False + + step _ a = return + $ if predicate a + then Done True + else Partial False -- | Return 'True' if the given element is present in the stream. -- @@ -879,14 +950,17 @@ elem a = any (a ==) -- @since 0.7.0 {-# INLINABLE all #-} all :: Monad m => (a -> Bool) -> Fold m a Bool -all predicate = - Fold - (\_ a -> - if predicate a - then return $ FL.Partial True - else return $ FL.Done False) - (return True) +all predicate = Fold step initial return + + where + + initial = return True + + step _ a = return + $ if predicate a + then Partial True + else Done False -- | Returns 'True' if the given element is not present in the stream. -- @@ -928,6 +1002,7 @@ or = any (== True) ------------------------------------------------------------------------------ -- Binary APIs ------------------------------------------------------------------------------ + -- -- XXX These would just be applicative compositions of terminating folds. @@ -967,20 +1042,7 @@ splitAt -> Fold m a b -> Fold m a c -> Fold m a (b, c) -splitAt n (Fold stepL initialL extractL) (Fold stepR initialR extractR) = - Fold step initial extract - where - initial = Tuple3' n <$> liftInitialM initialL <*> liftInitialM initialR - - step (Tuple3' i xL xR) input = - if i > 0 - then liftStep stepL xL input >>= (\a -> partialM $ Tuple3' (i - 1) a xR) - else do - b <- liftStep stepR xR input - case b of - Partial _ -> partialM $ Tuple3' i xL b - Done x -> fmap Done $ (,) <$> liftExtract extractL xL <*> return x - extract (Tuple3' _ a b) = (,) <$> liftExtract extractL a <*> liftExtract extractR b +splitAt n fld1 fld2 = splitWith (,) (ltake n fld1) fld2 ------------------------------------------------------------------------------ -- Element Aware APIs @@ -990,62 +1052,136 @@ splitAt n (Fold stepL initialL extractL) (Fold stepR initialR extractR) = -- Binary APIs ------------------------------------------------------------------------------ +-- XXX Minimal haddock documentation for now, I don't want to include foldMany +-- in the documentation yet +-- | Stops the fold at an infixed separator element, dropping the separator. +-- +-- The first part is the result of the fold and the second part is rest of the +-- stream. +-- +-- @ +-- "--.--" => "--" "--" +-- "--." => "--" "" +-- ".--" => "" "--" +-- @ +-- +-- * Stops - when the predicate succeeds. +-- +-- /Internal/ +{-# INLINE sliceSepBy #-} +sliceSepBy :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a b +sliceSepBy predicate (Fold fstep finitial fextract) = Fold step initial fextract + + where + + initial = finitial + + step s a = + if not (predicate a) + then fstep s a + else Done <$> fextract s + +-- XXX Minimal haddock documentation for now, I don't want to include foldMany +-- in the documentation yet +-- | Like 'sliceSepBy' but does not drop the seperator element. +-- +-- The first part is the result of the fold and the second part is rest of the +-- stream. +-- +-- @ +-- "--.--" => "--." "--" +-- "--." => "--." "" +-- ".--" => "." "--" +-- @ +-- +-- * Stops - when the predicate succeeds. +-- +-- /Internal/ +{-# INLINE sliceSepWith #-} +sliceSepWith :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a b +sliceSepWith predicate (Fold fstep finitial fextract) = + Fold step initial fextract + + where + + initial = finitial + + step s a = + if not (predicate a) + then fstep s a + else do + res <- fstep s a + case res of + Partial sres -> Done <$> fextract sres + Done bres -> return $ Done bres + Done1 bres -> return $ Done bres + +-- XXX Minimal haddock documentation for now, I don't want to include foldMany +-- in the documentation yet +-- | Stops the fold at the last infixed separator element dropping all the +-- separators. +-- +-- The first part is the result of the fold and the second part is rest of the +-- stream. +-- +-- @ +-- "--...--" => "--" "--" +-- "--..." => "--" "" +-- "...--" => "" "--" +-- @ +-- +-- * Stops - when the predicate fails after the first success. +-- +-- /Internal/ +{-# INLINE wordBy #-} +wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a b +wordBy predicate (Fold fstep finitial fextract) = Fold step initial extract + + where + + initial = Left' <$> finitial + + step (Left' s) a = + if not (predicate a) + then do + res <- fstep s a + return + $ case res of + Partial sres -> Partial $ Left' sres + Done bres -> Done bres + Done1 bres -> Done1 bres + else do + ex <- fextract s + return $ Partial $ Right' ex + step (Right' s) a = + return + $ if not (predicate a) + then Done1 s + else Partial $ Right' s + + extract (Left' s) = fextract s + extract (Right' s) = return s + -- | Break the input stream into two groups, the first group takes the input as -- long as the predicate applied to the first element of the stream and next -- input element holds 'True', the second group takes the rest of the input. -- -- /Internal/ -- +{-# INLINE spanBy #-} spanBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Fold m a (b, c) -spanBy cmp (Fold stepL initialL extractL) (Fold stepR initialR extractR) = - Fold step initial extract - - where - - initial = - Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> - return (Tuple' Nothing True) - - step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) - step (Tuple3' a b (Tuple' (Just frst) isFirstG)) input = - if cmp frst input && isFirstG - then liftStep stepL a input >>= - (\a' -> - return $ - Partial $ Tuple3' a' b (Tuple' (Just frst) isFirstG)) - else liftStep stepR b input >>= - (\a' -> partialM $ Tuple3' a a' (Tuple' Nothing False)) - step (Tuple3' a b (Tuple' Nothing isFirstG)) input = - if isFirstG - then liftStep stepL a input >>= - (\a' -> - return $ - Partial $ Tuple3' a' b (Tuple' (Just input) isFirstG)) - else liftStep stepR b input >>= - (\a' -> partialM $ Tuple3' a a' (Tuple' Nothing False)) - - extract (Tuple3' a b _) = - (,) <$> liftExtract extractL a <*> liftExtract extractR b - -{- -spanBy cmp fld1 fld2 = - bind (ltakeWhileBy1 cmp fld1) $ \(ma, b) -> - case ma of - Nothing -> (,) b <$> fld2 - Just a -> (,) b <$> prependWith a fld2 --} - +spanBy cmp fld1 fld2 = splitWith (,) (groupBy cmp fld1) fld2 -- | @span p f1 f2@ composes folds @f1@ and @f2@ such that @f1@ consumes the -- input as long as the predicate @p@ is 'True'. @f2@ consumes the rest of the -- input. -- --- > let span_ p xs = S.fold (S.span p FL.toList FL.toList) $ S.fromList xs +-- > let span_ p xs = S.fold (FL.span p FL.toList FL.toList) $ S.fromList xs -- -- >>> span_ (< 1) [1,2,3] -- > ([],[1,2,3]) @@ -1067,34 +1203,7 @@ span -> Fold m a b -> Fold m a c -> Fold m a (b, c) -span p (Fold stepL initialL extractL) (Fold stepR initialR extractR) = - Fold step initial extract - - where - - initial = - Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> - return True - - step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) - step (Tuple3' a b isFirstG) input = - if isFirstG && p input - then liftStep stepL a input >>= - (\a' -> partialM $ Tuple3' a' b True) - else liftStep stepR b input >>= - (\a' -> partialM $ Tuple3' a a' False) - - extract (Tuple3' a b _) = - (,) <$> liftExtract extractL a <*> liftExtract extractR b - -{- -span predicate fld1 fld2 = - bind (ltakeWhile1 predicate fld1) $ \(ma, b) -> - case ma of - Nothing -> (,) b <$> fld2 - Just a -> (,) b <$> prependWith a fld2 --} - +span p fld1 fld2 = splitWith (,) (ltakeWhile p fld1) fld2 -- | -- > break p = span (not . p) @@ -1137,29 +1246,7 @@ spanByRolling -> Fold m a b -> Fold m a c -> Fold m a (b, c) -spanByRolling cmp (Fold stepL initialL extractL) (Fold stepR initialR extractR) = - Fold step initial extract - - where - - initial = - Tuple3' <$> liftInitialM initialL <*> liftInitialM initialR <*> - return Nothing - - step (Tuple3' (Done a) (Done b) _) _ = doneM (a, b) - - step (Tuple3' a b (Just frst)) input = - if cmp input frst - then liftStep stepL a input >>= - (\a' -> partialM $ Tuple3' a' b (Just input)) - else liftStep stepR b input >>= - (\b' -> partialM $ Tuple3' a b' (Just input)) - step (Tuple3' a b Nothing) input = - liftStep stepL a input >>= - (\a' -> partialM $ Tuple3' a' b (Just input)) - - extract (Tuple3' a b _) = - (,) <$> liftExtract extractL a <*> liftExtract extractR b +spanByRolling cmp fld1 fld2 = splitWith (,) (groupByRolling cmp fld1) fld2 ------------------------------------------------------------------------------ -- Binary splitting on a separator @@ -1198,27 +1285,19 @@ breakOn pat f m = undefined -- @since 0.7.0 {-# INLINE tee #-} tee :: Monad m => Fold m a b -> Fold m a c -> Fold m a (b,c) -tee f1 f2 = (,) <$> f1 <*> f2 +tee f1 f2 = teeWith (,) f1 f2 {-# INLINE foldNil #-} foldNil :: Monad m => Fold m a [b] foldNil = Fold step begin done where begin = return [] - step _ _ = partialM [] + step _ _ = return $ Partial [] done = return -- XXX How is the performance? {-# INLINE foldCons #-} foldCons :: Monad m => Fold m a b -> Fold m a [b] -> Fold m a [b] -foldCons (Fold stepL beginL doneL) (Fold stepR beginR doneR) = - Fold step begin done - - where - - begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR - step (Tuple' (Done a) (Done b)) _ = doneM $ a:b - step (Tuple' xL xR) a = fmap Partial $ Tuple' <$> liftStep stepL xL a <*> liftStep stepR xR a - done (Tuple' xL xR) = (:) <$> liftExtract doneL xL <*> liftExtract doneR xR +foldCons f1 f2 = teeWith (:) f1 f2 -- XXX use "List" instead of "[]"?, use Array for output to scale it to a large -- number of consumers? For polymorphic case a vector could be helpful. For @@ -1246,28 +1325,71 @@ foldCons (Fold stepL beginL doneL) (Fold stepR beginR doneR) = distribute :: Monad m => [Fold m a b] -> Fold m a [b] distribute = foldr foldCons foldNil +-- It would be neater if we use a new type +{-# INLINE combineFoldState #-} +combineFoldState :: Step () () -> Step () () -> Step () () +combineFoldState (Partial ()) _ = Partial () +combineFoldState _ (Partial ()) = Partial () +combineFoldState (Done ()) (Done ()) = Done () +combineFoldState (Done ()) (Done1 ()) = Done () +combineFoldState (Done1 ()) (Done ()) = Done () +combineFoldState (Done1 ()) (Done1 ()) = Done1 () + +{-# INLINE toUnitState #-} +toUnitState :: Step s b -> Step () () +toUnitState (Partial _) = Partial () +toUnitState (Done _) = Done () +toUnitState (Done1 _) = Done1 () + + -- | Like 'distribute' but for folds that return (), this can be more efficient -- than 'distribute' as it does not need to maintain state. -- --- XXX Efficiently find when to stop? {-# INLINE distribute_ #-} distribute_ :: Monad m => [Fold m a ()] -> Fold m a () distribute_ fs = Fold step initial extract + where - initial = Prelude.mapM initialize fs - step ss a = do - -- XXX We can use foldM here instead and check if the number of Stops - -- are equal to the number of Folds - Prelude.mapM_ (\fld -> void $ runStep fld a) ss - partialM ss - extract ss = Prelude.mapM_ (\(Fold _ i e) -> i >>= \r -> e r) ss + + initial = Prelude.mapM (fmap Partial . initialize) fs + + step ss a = do + ss1 <- Prelude.mapM (flip runMaybeStep a) ss + let endRes = + List.foldl' + (\fb fa -> toUnitState fa `combineFoldState` fb) + (Done1 ()) + ss1 + return + $ case endRes of + Done () -> Done () + Done1 () -> Done1 () + Partial () -> Partial ss1 + + extract ss = Prelude.mapM_ runMaybeExtract ss + + runMaybeExtract (Partial (Fold _ i d)) = i >>= d + runMaybeExtract _ = return () + + runMaybeStep (Done1 ()) _ = return $ Done1 () + runMaybeStep (Done ()) _ = return $ Done1 () + runMaybeStep (Partial (Fold s i d)) a = do + ii <- i + res <- s ii a + return + $ case res of + Partial sres -> Partial (Fold s (return sres) d) + Done _ -> Done () + Done1 _ -> Done1 () ------------------------------------------------------------------------------ -- Partitioning ------------------------------------------------------------------------------ --- + -- | Partition the input over two folds using an 'Either' partitioning --- predicate. +-- predicate. This fold terminates when both the folds terminate. +-- +-- See 'partitionByFstM' and 'partitionByMinM'. -- -- @ -- @@ -1310,24 +1432,87 @@ distribute_ fs = Fold step initial extract partitionByM :: Monad m => (a -> m (Either b c)) -> Fold m b x -> Fold m c y -> Fold m a (x, y) partitionByM f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = - Fold step begin done where - begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR - step (Tuple' (Done x) (Done y)) _ = doneM (x, y) - step (Tuple' xL xR) a = do + begin = do + sL <- beginL + sR <- beginR + return $ RunBoth sL sR + + step (RunBoth sL sR) a = do + r <- f a + case r of + Left b -> do + res <- stepL sL b + return + $ case res of + Partial sres -> Partial $ RunBoth sres sR + Done bres -> Partial $ RunRight bres sR + Done1 bres -> Partial $ RunRight bres sR + Right c -> do + res <- stepR sR c + return + $ case res of + Partial sres -> Partial $ RunBoth sL sres + Done bres -> Partial $ RunLeft sL bres + Done1 bres -> Partial $ RunLeft sL bres + step (RunLeft sL bR) a = do r <- f a case r of - Left b -> fmap Partial $ Tuple' <$> liftStep stepL xL b <*> return xR - Right c -> fmap Partial $ Tuple' xL <$> liftStep stepR xR c - done (Tuple' xL xR) = (,) <$> liftExtract doneL xL <*> liftExtract doneR xR + Left b -> do + res <- stepL sL b + return + $ case res of + Partial sres -> Partial $ RunLeft sres bR + Done bres -> Done (bres, bR) + Done1 bres -> Done1 (bres, bR) + Right _ -> return $ Partial $ RunLeft sL bR + step (RunRight bL sR) a = do + r <- f a + case r of + Left _ -> return $ Partial $ RunRight bL sR + Right c -> do + res <- stepR sR c + return + $ case res of + Partial sres -> Partial $ RunRight bL sres + Done bres -> Done (bL, bres) + Done1 bres -> Done1 (bL, bres) + + done (RunBoth sL sR) = do + bL <- doneL sL + bR <- doneR sR + return (bL, bR) + done (RunLeft sL bR) = do + bL <- doneL sL + return (bL, bR) + done (RunRight bL sR) = do + bR <- doneR sR + return (bL, bR) + +-- | Similar to 'partitionByM' but terminates when the first fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE partitionByFstM #-} +partitionByFstM :: -- Monad m => + (a -> m (Either b c)) -> Fold m b x -> Fold m c y -> Fold m a (x, y) +partitionByFstM = undefined + +-- | Similar to 'partitionByM' but terminates when any fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE partitionByMinM #-} +partitionByMinM :: -- Monad m => + (a -> m (Either b c)) -> Fold m b x -> Fold m c y -> Fold m a (x, y) +partitionByMinM = undefined -- Note: we could use (a -> Bool) instead of (a -> Either b c), but the latter -- makes the signature clearer as to which case belongs to which fold. -- XXX need to check the performance in both cases. - -- | Same as 'partitionByM' but with a pure partition function. -- -- Count even and odd numbers in a stream: @@ -1379,6 +1564,7 @@ partition = partitionBy id -- This is the consumer side dual of the producer side 'mux' operation (XXX to -- be implemented). +-- XXX I have a few questions regarding this -- | Split the input stream based on a key field and fold each split using a -- specific fold collecting the results in a map from the keys to the results. -- Useful for cases like protocol handlers to handle different type of packets @@ -1394,7 +1580,6 @@ partition = partitionBy id -- @ -- -- @since 0.7.0 --- XXX Find an efficient way to Done. Check if all the folds have stopped. {-# INLINE demuxWith #-} demuxWith :: (Monad m, Ord k) => (a -> (k, a')) -> Map k (Fold m a' b) -> Fold m a (Map k b) @@ -1402,29 +1587,52 @@ demuxWith f kv = Fold step initial extract where - initial = return kv --- alterF is available only since containers version 0.5.8.2 -#if MIN_VERSION_containers(0,5,8) - step mp a = case f a of - (k, a') -> Partial <$> Map.alterF twiddle k mp - -- XXX should we raise an exception in Nothing case? - -- Ideally we should enforce that it is a total map over k so that look - -- up never fails - -- XXX we could use a monadic update function for a single lookup and - -- update in the map. - where - twiddle Nothing = pure Nothing - twiddle (Just fld) = Just <$> runStep fld a' -#else - step mp a = + initial = + Tuple' (Map.size kv) <$> Prelude.mapM (fmap Left . initialize) kv + + step (Tuple' n mp) a = let (k, a') = f a - in case Map.lookup k mp of - Nothing -> partialM mp - Just fld -> do - !r <- runStep fld a' - partialM $ Map.insert k r mp -#endif - extract = Prelude.mapM (\(Fold _ acc e) -> acc >>= e) + in case Map.lookup k mp of + Nothing -> return $ Partial $ Tuple' n mp + Just (Left (Fold stp ini dn)) -> do + !st <- ini + !res <- stp st a' + let n1 = n - 1 + return + $ case res of + Partial sres -> + Partial + $ Tuple' n + $ Map.insert + k + (Left (Fold stp (return sres) dn)) + mp + -- XXX Check for n - 1 == 0 here for Done & Done1? + -- XXX Treat the last completing fold differently? + Done bres -> + if n1 == 0 + then Done $ done $ Map.insert k (Right bres) mp + else Partial + $ Tuple' n1 + $ Map.insert k (Right bres) mp + Done1 bres -> + if n1 == 0 + then Done1 $ done $ Map.insert k (Right bres) mp + else Partial + $ Tuple' n1 + $ Map.insert k (Right bres) mp + Just (Right _) -> do + return $ Partial $ Tuple' n mp + + extract (Tuple' _ mp) = Prelude.mapM runEitherExtract mp + + runEitherExtract (Left (Fold _ i d)) = i >>= d + runEitherExtract (Right b) = return b + + done = Map.map runEitherDone + + runEitherDone (Left _) = error "Incomplete folds exist" + runEitherDone (Right b) = b -- | Fold a stream of key value pairs using a map of specific folds for each -- key into a map from keys to the results of fold outputs of the corresponding @@ -1443,6 +1651,9 @@ demux :: (Monad m, Ord k) => Map k (Fold m a b) -> Fold m (k, a) (Map k b) demux = demuxWith id +-- data DemuxState m s = DemuxP !m !s | DemuxD !m +data DemuxState m s = DemuxP Int s m | DemuxD Int m + {-# INLINE demuxWithDefault_ #-} demuxWithDefault_ :: (Monad m, Ord k) => (a -> (k, a')) -> Map k (Fold m a' b) -> Fold m (k, a') b -> Fold m a () @@ -1451,22 +1662,77 @@ demuxWithDefault_ f kv (Fold dstep dinitial dextract) = where - initial = do - mp <- Prelude.mapM initialize kv - dacc <- liftInitialM dinitial - return (Tuple' mp dacc) - step (Tuple' mp dacc) a - | (k, a') <- f a - = case Map.lookup k mp of - Nothing -> do - acc <- liftStep dstep dacc (k, a') - partialM $ Tuple' mp acc - Just (Fold step' acc _) -> do - _ <- acc >>= \x -> step' x a' - partialM $ Tuple' mp dacc - extract (Tuple' mp dacc) = do - void $ liftExtract dextract dacc - Prelude.mapM_ (\(Fold _ acc e) -> acc >>= e) mp + initial = + DemuxP (Map.size kv + 1) <$> dinitial <*> Prelude.mapM initialize kv + + step (DemuxP n dacc mp) a + | (k, a') <- f a = + case Map.lookup k mp of + Nothing -> do + res <- dstep dacc (k, a') + let n1 = n - 1 + return + $ case res of + Partial sres -> Partial $ DemuxP n sres mp + Done _ -> + if n1 == 0 + then Done () + else Partial $ DemuxD n1 mp + Done1 _ -> + if n1 == 0 + then Done1 () + else Partial $ DemuxD n1 mp + Just (Fold stp ini dn) -> do + !st <- ini + !res <- stp st a' + let n1 = n - 1 + return + $ case res of + Partial sres -> + Partial + $ DemuxP n dacc + $ Map.insert k (Fold stp (return sres) dn) mp + -- XXX Check for n - 1 == 0 here for Done & Done1? + -- XXX Treat the last completing fold differently? + Done _ -> + if n1 == 0 + then Done () + else Partial $ DemuxP n1 dacc $ Map.delete k mp + Done1 _ -> + if n1 == 0 + then Done1 () + else Partial $ DemuxP n1 dacc $ Map.delete k mp + -- XXX Reduce code duplication? + step (DemuxD n mp) a + | (k, a') <- f a = + case Map.lookup k mp of + Nothing -> return $ Partial $ DemuxD n mp + Just (Fold stp ini dn) -> do + !st <- ini + !res <- stp st a' + let n1 = n - 1 + return + $ case res of + Partial sres -> + Partial + $ DemuxD n + $ Map.insert k (Fold stp (return sres) dn) mp + -- XXX Check for n - 1 == 0 here for Done & Done1? + -- XXX Treat the last completing fold differently? + Done _ -> + if n1 == 0 + then Done () + else Partial $ DemuxD n1 $ Map.delete k mp + Done1 _ -> + if n1 == 0 + then Done1 () + else Partial $ DemuxD n1 $ Map.delete k mp + + extract (DemuxP _ dacc mp) = do + void $ dextract dacc + Prelude.mapM_ (\(Fold _ i d) -> i >>= d) mp + extract (DemuxD _ mp) = do + Prelude.mapM_ (\(Fold _ i d) -> i >>= d) mp -- | Split the input stream based on a key field and fold each split using a -- specific fold without collecting the results. Useful for cases like protocol @@ -1494,18 +1760,35 @@ demuxWith_ f kv = Fold step initial extract where - initial = Prelude.mapM initialize kv - step mp a - -- XXX should we raise an exception in Nothing case? - -- Ideally we should enforce that it is a total map over k so that look - -- up never fails - | (k, a') <- f a - = case Map.lookup k mp of - Nothing -> partialM mp - Just (Fold step' acc _) -> do - _ <- acc >>= \x -> step' x a' - partialM mp - extract = Prelude.mapM_ (\(Fold _ acc e) -> acc >>= e) + initial = Tuple' (Map.size kv) <$> Prelude.mapM initialize kv + + step (Tuple' n mp) a + | (k, a') <- f a = + case Map.lookup k mp of + Nothing -> return $ Partial $ Tuple' n mp + Just (Fold stp ini dn) -> do + !st <- ini + !res <- stp st a' + let n1 = n - 1 + return + $ case res of + Partial sres -> + Partial + $ Tuple' n + $ Map.insert k (Fold stp (return sres) dn) mp + -- XXX Check for n - 1 == 0 here for Done & Done1? + -- XXX Treat the last completing fold differently? + Done _ -> + if n1 == 0 + then Done () + else Partial $ Tuple' n1 $ Map.delete k mp + Done1 _ -> + if n1 == 0 + then Done1 () + else Partial $ Tuple' n1 $ Map.delete k mp + + extract (Tuple' _ mp) = do + Prelude.mapM_ (\(Fold _ i d) -> i >>= d) mp -- | Given a stream of key value pairs and a map from keys to folds, fold the -- values for each key using the corresponding folds, discarding the outputs. @@ -1551,17 +1834,41 @@ classifyWith f (Fold step initial extract) = Fold step' initial' extract' where initial' = return Map.empty + step' kv a = - let k = f a - in case Map.lookup k kv of + case Map.lookup k kv of Nothing -> do x <- initial r <- step x a - partialM $ Map.insert k r kv + return + $ Partial + $ flip (Map.insert k) kv + $ case r of + Partial sr -> Left' sr + Done br -> Right' br + Done1 br -> Right' br Just x -> do - r <- liftStep step x a - partialM $ Map.insert k r kv - extract' = Prelude.mapM (liftExtract extract) + case x of + Left' s -> do + r <- step s a + return + $ Partial + $ flip (Map.insert k) kv + $ case r of + Partial sr -> Left' sr + Done br -> Right' br + Done1 br -> Right' br + Right' _ -> return $ Partial kv + + where + + k = f a + + extract' = + Prelude.mapM + (\case + Left' s -> extract s + Right' b -> return b) -- | Given an input stream of key value pairs and a fold for values, fold all -- the values belonging to each key. Useful for map/reduce, bucketizing the @@ -1586,7 +1893,7 @@ classify fld = classifyWith fst (lmap snd fld) ------------------------------------------------------------------------------ -- Unzipping ------------------------------------------------------------------------------ --- + -- | Like 'unzipWith' but with a monadic splitter function. -- -- @since 0.7.0 @@ -1598,12 +1905,70 @@ unzipWithM f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = where - step (Tuple' (Done l) (Done r)) _ = doneM (l, r) - step (Tuple' xL xR) a = do - (b,c) <- f a - fmap Partial $ Tuple' <$> liftStep stepL xL b <*> liftStep stepR xR c - begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR - done (Tuple' xL xR) = (,) <$> liftExtract doneL xL <*> liftExtract doneR xR + begin = RunBoth <$> beginL <*> beginR + + step (RunBoth sL sR) a = do + (b, c) <- f a + resL <- stepL sL b + resR <- stepR sR c + case resL of + Partial sresL -> + return + $ Partial + $ case resR of + Partial sresR -> RunBoth sresL sresR + Done bresR -> RunLeft sresL bresR + Done1 bresR -> RunLeft sresL bresR + Done bresL -> + return + $ case resR of + Partial sresR -> Partial $ RunRight bresL sresR + Done bresR -> Done (bresL, bresR) + Done1 bresR -> Done (bresL, bresR) + Done1 bresL -> + return + $ case resR of + Partial sresR -> Partial $ RunRight bresL sresR + Done bresR -> Done (bresL, bresR) + Done1 bresR -> Done1 (bresL, bresR) + step (RunLeft sL bR) a = do + (b, _) <- f a + resL <- stepL sL b + return + $ case resL of + Partial sresL -> Partial $ RunLeft sresL bR + Done bresL -> Done (bresL, bR) + Done1 bresL -> Done1 (bresL, bR) + step (RunRight bL sR) a = do + (_, c) <- f a + resR <- stepR sR c + return + $ case resR of + Partial sresR -> Partial $ RunRight bL sresR + Done bresR -> Done (bL, bresR) + Done1 bresR -> Done1 (bL, bresR) + + done (RunBoth sL sR) = (,) <$> doneL sL <*> doneR sR + done (RunLeft sL bR) = (,) <$> doneL sL <*> return bR + done (RunRight bL sR) = (,) bL <$> doneR sR + +-- | Similar to 'unzipWithM' but terminates when the first fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE unzipWithFstM #-} +unzipWithFstM :: -- Monad m => + (a -> m (b,c)) -> Fold m b x -> Fold m c y -> Fold m a (x,y) +unzipWithFstM = undefined + +-- | Similar to 'unzipWithM' but terminates when any fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE unzipWithMinM #-} +unzipWithMinM :: -- Monad m => + (a -> m (b,c)) -> Fold m b x -> Fold m c y -> Fold m a (x,y) +unzipWithMinM = undefined -- | Split elements in the input stream into two parts using a pure splitter -- function, direct each part to a different fold and zip the results. @@ -1666,39 +2031,42 @@ lchunksInRange low high (Fold step1 initial1 extract1) {-# INLINE toParallelSVar #-} toParallelSVar :: MonadIO m => SVar t m a -> Maybe WorkerInfo -> Fold m a () toParallelSVar svar winfo = Fold step initial extract + where initial = return () - step _ x = liftIO $ do - -- XXX we can have a separate fold for unlimited buffer case to avoid a - -- branch in the step here. - decrementBufferLimit svar - void $ send svar (ChildYield x) - return $ FL.Partial () + step () x = + liftIO $ do + decrementBufferLimit svar + void $ send svar (ChildYield x) + return $ FL.Partial () + -- XXX we can have a separate fold for unlimited buffer case to avoid a + -- branch in the step here. - extract () = liftIO $ - sendStop svar winfo + extract () = liftIO $ sendStop svar winfo {-# INLINE toParallelSVarLimited #-} toParallelSVarLimited :: MonadIO m => SVar t m a -> Maybe WorkerInfo -> Fold m a () toParallelSVarLimited svar winfo = Fold step initial extract + where initial = return True - step True x = liftIO $ do - yieldLimitOk <- decrementYieldLimit svar - if yieldLimitOk - then do - decrementBufferLimit svar - void $ send svar (ChildYield x) - return $ FL.Partial True - else do - cleanupSVarFromWorker svar - sendStop svar winfo - return $ FL.Done () + step True x = + liftIO $ do + yieldLimitOk <- decrementYieldLimit svar + if yieldLimitOk + then do + decrementBufferLimit svar + void $ send svar (ChildYield x) + return $ FL.Partial True + else do + cleanupSVarFromWorker svar + sendStop svar winfo + return $ FL.Done () step False _ = return $ FL.Done () extract True = liftIO $ sendStop svar winfo diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index 5c31d8521b..c1f85e393d 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -119,12 +119,6 @@ module Streamly.Internal.Data.Fold.Types ( Step (..) - , liftStep - , liftExtract - , liftInitial - , liftInitialM - , partialM - , doneM , Fold (..) , Fold2 (..) @@ -140,8 +134,15 @@ module Streamly.Internal.Data.Fold.Types , ltake , ltakeWhile + , distributiveAp + , teeWith + , teeWithFst + , teeWithMin , splitWith , many + , groupBy + , groupByRolling + , takeByTime , lsessionsOf , lchunksOf , lchunksOf2 @@ -149,17 +150,17 @@ module Streamly.Internal.Data.Fold.Types , duplicate , initialize , runStep - , concatMap + + , GenericRunner(..) -- Is used in multiple step functions ) where -import Data.Bifunctor +import Data.Bifunctor (Bifunctor(..)) import Control.Applicative (liftA2) import Control.Concurrent (threadDelay, forkIO, killThread) -import Control.Concurrent.MVar (MVar, newMVar, takeMVar, putMVar) +import Control.Concurrent.MVar (MVar, newMVar, swapMVar, readMVar) import Control.Exception (SomeException(..), catch, mask) import Control.Monad (void) -import Control.Monad.Catch (throwM) import Control.Monad.IO.Class (MonadIO(..)) import Control.Monad.Trans.Control (control) import Data.Maybe (isJust, fromJust) @@ -176,20 +177,26 @@ import Prelude hiding (concatMap) ------------------------------------------------------------------------------ -- {-# ANN type Step Fuse #-} -data Step s b = Partial !s | Done !b +data Step s b + = Partial !s + | Done !b + | Done1 !b instance Bifunctor Step where {-# INLINE bimap #-} bimap f _ (Partial a) = Partial (f a) bimap _ g (Done b) = Done (g b) + bimap _ g (Done1 b) = Done (g b) {-# INLINE first #-} first f (Partial a) = Partial (f a) first _ (Done x) = Done x + first _ (Done1 x) = Done1 x {-# INLINE second #-} - second f (Done a) = Done (f a) second _ (Partial x) = Partial x + second f (Done a) = Done (f a) + second f (Done1 a) = Done1 (f a) instance Functor (Step s) where {-# INLINE fmap #-} @@ -209,32 +216,6 @@ data Fold m a b = -- | @Fold @ @ step @ @ initial @ @ extract@ forall s. Fold (s -> a -> m (Step s b)) (m s) (s -> m b) -{-# INLINE liftStep #-} -liftStep :: Monad m => (s -> a -> m (Step s b)) -> Step s b -> a -> m (Step s b) -liftStep step (Partial s) a = step s a -liftStep _ x _ = return x - -{-# INLINE liftExtract #-} -liftExtract :: Monad m => (s -> m b) -> Step s b -> m b -liftExtract _ (Done b) = return b -liftExtract done (Partial s) = done s - -{-# INLINE liftInitial #-} -liftInitial :: s -> Step s b -liftInitial = Partial - -{-# INLINE liftInitialM #-} -liftInitialM :: Monad m => m s -> m (Step s b) -liftInitialM = fmap Partial - -{-# INLINE partialM #-} -partialM :: Monad m => s -> m (Step s b) -partialM = return . Partial - -{-# INLINE doneM #-} -doneM :: Monad m => b -> m (Step s b) -doneM = return . Done - -- | Experimental type to provide a side input to the fold for generating the -- initial state. For example, if we have to fold chunks of a stream and write -- each chunk to a different file, then we can generate the file name using a @@ -250,16 +231,18 @@ simplify (Fold2 step inject extract) c = Fold (\x a -> Partial <$> step x a) (inject c) extract -- | Maps a function on the output of the fold (the type @b@). -instance Monad m => Functor (Fold m a) where +instance Functor m => Functor (Fold m a) where {-# INLINE fmap #-} - fmap f (Fold step start done) = Fold step' start done' + fmap f (Fold step1 initial extract) = Fold step initial (fmap2 f extract) + where - step' x a = do - res <- step x a - case res of - Partial s -> partialM s - Done b -> doneM (f b) - done' x = fmap f $! done x + + step s b = fmap2 f (step1 s b) + fmap2 g = fmap (fmap g) + +-- {-# ANN type Step Fuse #-} +-- data SeqFoldState sl f sr = SeqFoldL !sl | SeqFoldR !f !sr +data SeqFoldState sl f sr = SeqFoldL sl | SeqFoldR f sr -- | Sequential fold application. Apply two folds sequentially to an input -- stream. The input is provided to the first fold, when it is done the @@ -268,30 +251,143 @@ instance Monad m => Functor (Fold m a) where -- -- Note: This is a folding dual of appending streams using -- 'Streamly.Prelude.serial', it splits the streams using two folds and zips --- the results. +-- the results. This has the same caveats as ParseD's @splitWith@ -- --- /Unimplemented/ +-- /Internal/ -- {-# INLINE splitWith #-} -splitWith :: -- Monad m => +splitWith :: Monad m => (a -> b -> c) -> Fold m x a -> Fold m x b -> Fold m x c -splitWith = undefined +splitWith func (Fold stepL initialL extractL) (Fold stepR initialR extractR) = + Fold step initial extract + + where + + initial = SeqFoldL <$> initialL + + step (SeqFoldL st) a = do + r <- stepL st a + case r of + Partial s -> return $ Partial (SeqFoldL s) + Done b -> Partial <$> (SeqFoldR (func b) <$> initialR) + Done1 b -> do + ir <- initialR + step (SeqFoldR (func b) ir) a + step (SeqFoldR f st) a = do + r <- stepR st a + return + $ case r of + Partial s -> Partial (SeqFoldR f s) + Done b -> Done (f b) + Done1 b -> Done1 (f b) + + extract (SeqFoldR f sR) = fmap f (extractR sR) + extract (SeqFoldL sL) = do + rL <- extractL sL + sR <- initialR + rR <- extractR sR + return $ func rL rR + +-- {-# ANN type GenericRunner Fuse #-} +data GenericRunner sL sR bL bR + = RunBoth !sL !sR + | RunLeft !sL !bR + | RunRight !bL !sR -- | The fold resulting from '<*>' distributes its input to both the argument -- folds and combines their output using the supplied function. instance Monad m => Applicative (Fold m a) where {-# INLINE pure #-} pure b = Fold (\() _ -> pure $ Done b) (pure ()) (\() -> pure b) + -- XXX deprecate this? {-# INLINE (<*>) #-} - (Fold stepL beginL doneL) <*> (Fold stepR beginR doneR) = - let combine (Done dL) (Done dR) = Done $ dL dR - combine sl sr = Partial $ Tuple' sl sr - step (Tuple' xL xR) a = - combine <$> liftStep stepL xL a <*> liftStep stepR xR a - begin = Tuple' <$> liftInitialM beginL <*> liftInitialM beginR - done (Tuple' xL xR) = liftExtract doneL xL <*> liftExtract doneR xR - in Fold step begin done + (<*>) = distributiveAp + +-- | @teeWith f f1 f2@ distributes its input to both @f1@ and @f2@ until both +-- of them terminate and combines their output using @f@. +-- +-- /Internal/ +-- +{-# INLINE teeWith #-} +teeWith :: Monad m => (b -> c -> d) -> Fold m a b -> Fold m a c -> Fold m a d +teeWith f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = + Fold step begin done + + where + + begin = do + sL <- beginL + sR <- beginR + return $ RunBoth sL sR + + step (RunBoth sL sR) a = do + resL <- stepL sL a + resR <- stepR sR a + case resL of + Partial sL1 -> + return + $ Partial + $ case resR of + Partial sR1 -> RunBoth sL1 sR1 + Done bR -> RunLeft sL1 bR + Done1 bR -> RunLeft sL1 bR + Done bL -> + return + $ case resR of + Partial sR1 -> Partial $ RunRight bL sR1 + Done bR -> Done $ f bL bR + Done1 bR -> Done $ f bL bR + Done1 bL -> + return + $ case resR of + Partial sR1 -> Partial $ RunRight bL sR1 + Done bR -> Done $ f bL bR + Done1 bR -> Done1 $ f bL bR + step (RunLeft sL bR) a = do + resL <- stepL sL a + return + $ case resL of + Partial sL1 -> Partial $ RunLeft sL1 bR + Done bL -> Done $ f bL bR + Done1 bL -> Done1 $ f bL bR + step (RunRight bL sR) a = do + resR <- stepR sR a + return + $ case resR of + Partial sR1 -> Partial $ RunRight bL sR1 + Done bR -> Done $ f bL bR + Done1 bR -> Done1 $ f bL bR + + done (RunBoth sL sR) = do + bL <- doneL sL + bR <- doneR sR + return $ f bL bR + done (RunLeft sL bR) = do + bL <- doneL sL + return $ f bL bR + done (RunRight bL sR) = do + bR <- doneR sR + return $ f bL bR + +-- | Like 'teeWith' but terminates when the first fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE teeWithFst #-} +teeWithFst :: (b -> c -> d) -> Fold m a b -> Fold m a c -> Fold m a d +teeWithFst = undefined +-- | Like 'teeWith' but terminates when any fold terminates. +-- +-- /Unimplemented/ +-- +{-# INLINE teeWithMin #-} +teeWithMin :: (b -> c -> d) -> Fold m a b -> Fold m a c -> Fold m a d +teeWithMin = undefined + +{-# INLINE distributiveAp #-} +distributiveAp :: Monad m => Fold m a (b -> c) -> Fold m a b -> Fold m a c +distributiveAp = teeWith ($) -- | Combines the outputs of the folds (the type @b@) using their 'Semigroup' -- instances. @@ -416,7 +512,7 @@ instance (Monad m, Floating b) => Floating (Fold m a b) where -- xn : ... : x2 : x1 : [] {-# INLINABLE toListRevF #-} toListRevF :: Monad m => Fold m a [a] -toListRevF = Fold (\xs x -> partialM $ x:xs) (return []) return +toListRevF = Fold (\xs x -> return $ Partial $ x:xs) (return []) return -- | @(lmap f fold)@ maps the function @f@ on the input of the fold. -- @@ -427,7 +523,7 @@ toListRevF = Fold (\xs x -> partialM $ x:xs) (return []) return {-# INLINABLE lmap #-} lmap :: (a -> b) -> Fold m b r -> Fold m a r lmap f (Fold step begin done) = Fold step' begin done - where + where step' x a = step x (f a) -- | @(lmapM f fold)@ maps the monadic function @f@ on the input of the fold. @@ -436,7 +532,7 @@ lmap f (Fold step begin done) = Fold step' begin done {-# INLINABLE lmapM #-} lmapM :: Monad m => (a -> m b) -> Fold m b r -> Fold m a r lmapM f (Fold step begin done) = Fold step' begin done - where + where step' x a = f a >>= step x ------------------------------------------------------------------------------ @@ -452,8 +548,8 @@ lmapM f (Fold step begin done) = Fold step' begin done {-# INLINABLE lfilter #-} lfilter :: Monad m => (a -> Bool) -> Fold m a r -> Fold m a r lfilter f (Fold step begin done) = Fold step' begin done - where - step' x a = if f a then step x a else partialM x + where + step' x a = if f a then step x a else return $ Partial x -- | Like 'lfilter' but with a monadic predicate. -- @@ -461,10 +557,10 @@ lfilter f (Fold step begin done) = Fold step' begin done {-# INLINABLE lfilterM #-} lfilterM :: Monad m => (a -> m Bool) -> Fold m a r -> Fold m a r lfilterM f (Fold step begin done) = Fold step' begin done - where + where step' x a = do use <- f a - if use then step x a else partialM x + if use then step x a else return $ Partial x -- | Transform a fold from a pure input to a 'Maybe' input, consuming only -- 'Just' values. @@ -476,37 +572,47 @@ lcatMaybes = lfilter isJust . lmap fromJust -- Parsing ------------------------------------------------------------------------------ --- XXX These should become terminating folds. --- -- | Take first @n@ elements from the stream and discard the rest. -- -- @since 0.7.0 {-# INLINE ltake #-} ltake :: Monad m => Int -> Fold m a b -> Fold m a b -ltake n (Fold step initial done) = Fold step' initial' done' +ltake n (Fold fstep finitial fextract) = Fold step initial extract + where - initial' = fmap (Tuple' 0) initial - step' (Tuple' i r) a = - if i < n - then do - res <- step r a + + initial = Tuple' 0 <$> finitial + + step (Tuple' i r) a + | i < n = do + res <- fstep r a case res of - Partial s -> partialM $ Tuple' (i + 1) s - Done b -> doneM b - else Done <$> done r - done' (Tuple' _ r) = done r + Partial sres -> do + let i1 = i + 1 + s1 = Tuple' i1 sres + if i1 < n + then return $ Partial s1 + else Done <$> fextract sres + Done bres -> return $ Done bres + Done1 bres -> return $ Done1 bres + | otherwise = Done1 <$> fextract r + + extract (Tuple' _ r) = fextract r -- | Takes elements from the input as long as the predicate succeeds. -- -- @since 0.7.0 -{-# INLINABLE ltakeWhile #-} +{-# INLINE ltakeWhile #-} ltakeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a b -ltakeWhile predicate (Fold step initial done) = Fold step' initial done +ltakeWhile predicate (Fold fstep finitial fextract) = + Fold step finitial fextract + where - step' r a = + + step s a = if predicate a - then step r a - else Done <$> done r + then fstep s a + else Done1 <$> fextract s ------------------------------------------------------------------------------ -- Nesting @@ -529,12 +635,23 @@ ltakeWhile predicate (Fold step initial done) = Fold step' initial done duplicate :: Monad m => Fold m a b -> Fold m a (Fold m a b) duplicate (Fold step begin done) = Fold step' begin (\x -> pure (Fold step (pure x) done)) + where - step' x a = do - res <- step x a - case res of - Partial s -> pure $ Partial s - Done _ -> pure $ Done $ Fold step (pure x) done + + step' x a = do + res <- step x a + -- XXX Discuss about initial element + case res of + Partial s -> pure $ Partial s + Done b -> + return + $ Done + $ Fold (\_ _ -> return $ Done1 b) (return x) (\_ -> return b) + Done1 b -> + return + $ Done1 + $ Fold (\_ _ -> return $ Done1 b) (return x) (\_ -> return b) + -- | Run the initialization effect of a fold. The returned fold would use the -- value returned by this effect as its initial value. @@ -554,16 +671,10 @@ runStep (Fold step initial extract) a = do r <- step i a case r of Partial s -> return $ Fold step (return s) extract - Done b -> return $ Fold (\_ _ -> doneM b) (return i) (\_ -> return b) - --- | Map a 'Fold' returning function on the result of a 'Fold'. --- --- /Unimplemented/ --- -{-# INLINE concatMap #-} -concatMap :: -- Monad m => - (b -> Fold m a c) -> Fold m a b -> Fold m a c -concatMap = undefined + Done b -> + return $ Fold (\_ _ -> return $ Done1 b) (return i) (\_ -> return b) + Done1 b -> + return $ Fold (\_ _ -> return $ Done1 b) (return i) (\_ -> return b) ------------------------------------------------------------------------------ -- Parsing @@ -589,22 +700,30 @@ many (Fold fstep finitial fextract) (Fold step1 initial1 extract1) = where initial = do - ps <- initial1 -- parse state - fs <- finitial -- fold state + ps <- initial1 + fs <- finitial pure (Tuple' ps fs) {-# INLINE step #-} step (Tuple' st fs) a = do r <- step1 st a case r of - Partial s -> - return $ Partial (Tuple' s fs) + Partial s -> return $ Partial (Tuple' s fs) Done b -> do + s <- initial1 + fs1 <- fstep fs b + return + $ case fs1 of + Partial s1 -> Partial (Tuple' s s1) + Done b1 -> Done b1 + Done1 b1 -> Done b1 + Done1 b -> do s <- initial1 fs1 <- fstep fs b case fs1 of - Partial s1 -> return $ Partial (Tuple' s s1) - Done b1 -> return $ Done b1 + Partial s1 -> step (Tuple' s s1) a + Done b1 -> return $ Done1 b1 + Done1 b1 -> return $ Done1 b1 extract (Tuple' s fs) = do b <- extract1 s @@ -612,6 +731,67 @@ many (Fold fstep finitial fextract) (Fold step1 initial1 extract1) = case acc of Partial s1 -> fextract s1 Done x -> return x + Done1 x -> return x + +data GroupByState a s = GroupByN !s | GroupByJ !a !s + +{-# INLINE groupBy #-} +groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a b +groupBy cmp (Fold fstep finitial fextract) = Fold step initial extract + + where + + initial = GroupByN <$> finitial + + step (GroupByN s) a = do + res <- fstep s a + return $ + case res of + Done bres -> Done bres + Done1 bres -> Done1 bres + Partial sres -> Partial (GroupByJ a sres) + step (GroupByJ a0 s) a = + if cmp a0 a + then do + res <- fstep s a + return $ + case res of + Done bres -> Done bres + Done1 bres -> Done1 bres + Partial sres -> Partial (GroupByJ a0 sres) + else Done1 <$> fextract s + + extract (GroupByN s) = fextract s + extract (GroupByJ _ s) = fextract s + +{-# INLINE groupByRolling #-} +groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a b +groupByRolling cmp (Fold fstep finitial fextract) = Fold step initial extract + + where + + initial = GroupByN <$> finitial + + step (GroupByN s) a = do + res <- fstep s a + return $ + case res of + Done bres -> Done bres + Done1 bres -> Done1 bres + Partial sres -> Partial (GroupByJ a sres) + step (GroupByJ a0 s) a = + if cmp a0 a + then do + res <- fstep s a + return $ + case res of + Done bres -> Done bres + Done1 bres -> Done1 bres + Partial sres -> Partial (GroupByJ a sres) + else Done1 <$> fextract s + + extract (GroupByN s) = fextract s + extract (GroupByJ _ s) = fextract s -- | For every n input items, apply the first fold and supply the result to the -- next fold. @@ -627,101 +807,77 @@ lchunksOf2 n (Fold step1 initial1 extract1) (Fold2 step2 inject2 extract2) = where - inject' x = Tuple3' 0 <$> liftInitialM initial1 <*> inject2 x + inject' x = Tuple3' 0 <$> initial1 <*> inject2 x + step' (Tuple3' i r1 r2) a = if i < n then do - res <- liftStep step1 r1 a - return $ Tuple3' (i + 1) res r2 + res <- step1 r1 a + case res of + Partial sres -> return $ Tuple3' (i + 1) sres r2 + Done b -> do + s <- initial1 + r21 <- step2 r2 b + return $ Tuple3' 0 s r21 + Done1 b -> do + s <- initial1 + r21 <- step2 r2 b + step' (Tuple3' 0 s r21) a else do - res <- liftExtract extract1 r1 + res <- extract1 r1 acc2 <- step2 r2 res - i1 <- initial1 - acc1 <- step1 i1 a - return $ Tuple3' 1 acc1 acc2 + return $ Tuple3' 0 i1 acc2 + extract' (Tuple3' _ r1 r2) = do - res <- liftExtract extract1 r1 + res <- extract1 r1 acc2 <- step2 r2 res extract2 acc2 --- | Group the input stream into windows of n second each and then fold each --- group using the provided fold function. --- --- For example, we can copy and distribute a stream to multiple folds where --- each fold can group the input differently e.g. by one second, one minute and --- one hour windows respectively and fold each resulting stream of folds. --- --- @ --- --- -----Fold m a b----|-Fold n a c-|-Fold n a c-|-...-|----Fold m a c --- --- @ --- XXX Should we check for mv2 at each step? -{-# INLINE lsessionsOf #-} -lsessionsOf :: MonadAsync m => Double -> Fold m a b -> Fold m b c -> Fold m a c -lsessionsOf n (Fold step1 initial1 extract1) (Fold step2 initial2 extract2) = - Fold step' initial' extract' +-- XXX zip the streams with timestamps +{-# INLINE takeByTime #-} +takeByTime :: MonadAsync m => Double -> Fold m a b -> Fold m a b +takeByTime n (Fold step initial done) = Fold step' initial' done' where - -- XXX MVar may be expensive we need a cheaper synch mechanism here initial' = do - i1 <- liftInitialM initial1 - i2 <- liftInitialM initial2 - mv1 <- liftIO $ newMVar i1 - mv2 <- liftIO $ newMVar (Right i2) - t <- control $ \run -> - mask $ \restore -> do - tid <- forkIO $ catch (restore $ void $ run (timerThread mv1 mv2)) - (handleChildException mv2) - run (return tid) - return $ Tuple3' t (Partial mv1) mv2 - step' acc@(Tuple3' t (Partial mv1) mv2) a = do - r1 <- liftIO $ takeMVar mv1 - res <- liftStep step1 r1 a - liftIO $ putMVar mv1 res - case res of - Partial _ -> partialM acc - Done _ -> partialM $ Tuple3' t (Done mv1) mv2 - step' acc@(Tuple3' _ (Done _) _) _ = partialM acc - extract' (Tuple3' tid _ mv2) = do - r2 <- liftIO $ takeMVar mv2 - liftIO $ killThread tid - case r2 of - Left e -> throwM e - Right x -> liftExtract extract2 x - - timerThread mv1 mv2 = do + s <- initial + mv <- liftIO $ newMVar False + t <- + control $ \run -> + mask $ \restore -> do + tid <- + forkIO + $ catch + (restore $ void $ run (timerThread mv)) + (handleChildException mv) + run (return tid) + return $ Tuple3' s mv t + + step' st@(Tuple3' s mv t) a = do + val <- liftIO $ readMVar mv + if val + then Done1 <$> done' st + else do + res <- step s a + return + $ case res of + Partial sres -> Partial $ Tuple3' sres mv t + Done bres -> Done bres + Done1 bres -> Done1 bres + + done' (Tuple3' s _ t) = liftIO (killThread t) >> done s + -- XXX thread should be killed at cleanup + + timerThread mv = do liftIO $ threadDelay (round $ n * 1000000) + -- Use IORef + CAS? instead of MVar since its a Bool? + liftIO $ void $ swapMVar mv True - r1 <- liftIO $ takeMVar mv1 - i1 <- liftInitialM initial1 - liftIO $ putMVar mv1 i1 + handleChildException :: MVar Bool -> SomeException -> IO () + handleChildException mv _ = void $ swapMVar mv True - res1 <- liftExtract extract1 r1 - r2 <- liftIO $ takeMVar mv2 - case r2 of - Left _ -> liftIO $ putMVar mv2 r2 - Right x -> do - res <- liftStep step2 x res1 - case res of - Partial _ -> do - liftIO $ putMVar mv2 $ Right res - timerThread mv1 mv2 - Done _ -> liftIO $ putMVar mv2 $ Right res - - handleChildException :: - MVar (Either SomeException a) -> SomeException -> IO () - handleChildException mv2 e = do - r2 <- takeMVar mv2 - let r = case r2 of - Left _ -> r2 - Right _ -> Left e - putMVar mv2 r - -{- {-# INLINE lsessionsOf #-} lsessionsOf :: MonadAsync m => Double -> Fold m a b -> Fold m b c -> Fold m a c lsessionsOf n split collect = many collect (takeByTime n split) --} diff --git a/src/Streamly/Internal/Data/Parser.hs b/src/Streamly/Internal/Data/Parser.hs index 537aecc292..0a01d7e0ae 100644 --- a/src/Streamly/Internal/Data/Parser.hs +++ b/src/Streamly/Internal/Data/Parser.hs @@ -992,6 +992,8 @@ many f p = K.toParserK $ D.many f (K.fromParserK p) -- succeeds, stop when it fails. This parser fails if not even one result is -- collected. -- +-- @some fld parser = many (takeGE 1 fld) parser@ +-- -- Compare with 'Control.Applicative.some'. -- -- /Internal/ diff --git a/src/Streamly/Internal/Data/Parser/ParserD.hs b/src/Streamly/Internal/Data/Parser/ParserD.hs index 785f2511e9..83db426325 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -160,11 +160,11 @@ where import Control.Exception (assert) import Control.Monad.Catch (MonadCatch, MonadThrow(..)) -import Streamly.Internal.Data.Fold.Types - (Fold(..), liftInitialM, liftStep, liftExtract) +import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) import qualified Streamly.Internal.Data.Fold.Types as FL +import qualified Streamly.Internal.Data.Fold as FL import Prelude hiding (any, all, take, takeWhile, sequence, concatMap, maybe, either) @@ -190,6 +190,7 @@ fromFold (Fold fstep finitial fextract) = Parser step finitial fextract case res of FL.Partial s1 -> return $ Partial 0 s1 FL.Done b -> return $ Done 0 b + FL.Done1 b -> return $ Done 1 b ------------------------------------------------------------------------------- @@ -198,23 +199,11 @@ fromFold (Fold fstep finitial fextract) = Parser step finitial fextract -- {-# INLINE any #-} any :: Monad m => (a -> Bool) -> Parser m a Bool -any predicate = Parser step initial return - - where - - initial = return False - - step s a = return (if s || predicate a then Done 0 True else Partial 0 False) +any predicate = fromFold $ FL.any predicate {-# INLINABLE all #-} all :: Monad m => (a -> Bool) -> Parser m a Bool -all predicate = Parser step initial return - - where - - initial = return True - - step s a = return (if s && predicate a then Partial 0 True else Done 0 False) +all predicate = fromFold $ FL.all predicate ------------------------------------------------------------------------------- -- Failing Parsers @@ -310,65 +299,47 @@ either parser = Parser step initial extract ------------------------------------------------------------------------------- -- Taking elements ------------------------------------------------------------------------------- --- + +-- This is takeLE -- | See 'Streamly.Internal.Data.Parser.take'. -- -- /Internal/ -- {-# INLINE take #-} take :: Monad m => Int -> Fold m a b -> Parser m a b -take n (Fold fstep finitial fextract) = Parser step initial extract - - where - - initial = Tuple' 0 <$> liftInitialM finitial - - step (Tuple' i r) a - | i < n = do - res <- liftStep fstep r a - let i1 = i + 1 - s1 = Tuple' i1 res - if i1 < n - then return $ Partial 0 s1 - else Done 0 <$> liftExtract fextract res - | otherwise = Done 1 <$> liftExtract fextract r - - extract (Tuple' _ r) = liftExtract fextract r +take n fld = fromFold $ FL.ltake n fld +-- XXX We are purposefully ignoring the input here -- | See 'Streamly.Internal.Data.Parser.takeEQ'. -- -- /Internal/ -- {-# INLINE takeEQ #-} takeEQ :: MonadThrow m => Int -> Fold m a b -> Parser m a b -takeEQ cnt (Fold fstep finitial fextract) = Parser step initial extract +takeEQ cnt (Fold step initial extract) = Parser step' initial' extract' where n = max cnt 0 + initial' = Tuple' 0 <$> initial - initial = Tuple' 0 <$> liftInitialM finitial - - step (Tuple' i r) a - | i < n = do - res <- liftStep fstep r a - let i1 = i + 1 - s1 = Tuple' i1 res - if i1 < n - then return (Continue 0 s1) - else Done 0 <$> liftExtract fextract res - | otherwise = Done 1 <$> liftExtract fextract r + step' (Tuple' i r) a + | i < n = do + res <- step r a + return + $ case res of + FL.Partial s -> Continue 0 $ Tuple' (i + 1) s + FL.Done _ -> Error $ err (i + 1) + FL.Done1 _ -> Error $ err (i + 1) + | otherwise = Done 1 <$> extract r - extract (Tuple' i r) = - if n == i - then liftExtract fextract r - else throwM $ ParseError err + extract' (Tuple' i r) + | i == n = extract r + | otherwise = throwM $ ParseError $ err i - where + err i = + "takeEQ: Expecting exactly " ++ show n ++ " elements, got " ++ show i - err = - "takeEQ: Expecting exactly " ++ show n - ++ " elements, got " ++ show i -- | See 'Streamly.Internal.Data.Parser.takeGE'. -- @@ -376,34 +347,37 @@ takeEQ cnt (Fold fstep finitial fextract) = Parser step initial extract -- {-# INLINE takeGE #-} takeGE :: MonadThrow m => Int -> Fold m a b -> Parser m a b -takeGE cnt (Fold fstep finitial fextract) = Parser step initial extract +takeGE cnt (Fold step initial extract) = Parser step' initial' extract' + where n = max cnt 0 + initial' = Tuple' 0 <$> initial - initial = Tuple' 0 <$> liftInitialM finitial - - step (Tuple' i r) a = do - res <- liftStep fstep r a - let i1 = i + 1 - s1 = Tuple' i1 res - return $ - if i1 < n - then Continue 0 s1 - else Partial 0 s1 - - extract (Tuple' i r) = liftExtract fextract r >>= f - - where - - err = - "takeGE: Expecting at least " ++ show n - ++ " elements, got only " ++ show i + step' (Tuple' i r) a + | i < n = do + res <- step r a + return + $ case res of + FL.Partial s -> Continue 0 $ Tuple' (i + 1) s + FL.Done _ -> Error $ err (i + 1) + FL.Done1 _ -> Error $ err (i + 1) + | otherwise = do + res <- step r a + return + $ case res of + FL.Partial s -> Partial 0 $ Tuple' (i + 1) s + FL.Done b -> Done 0 b + FL.Done1 b -> Done 1 b + + extract' (Tuple' i b) + | i >= n = extract b + | otherwise = throwM $ ParseError $ err i + + err i = + "takeGE: Expecting at least " + ++ show n ++ " elements, got only " ++ show i - f x = - if i >= n - then return x - else throwM $ ParseError err -- | See 'Streamly.Internal.Data.Parser.takeWhile'. -- @@ -411,17 +385,7 @@ takeGE cnt (Fold fstep finitial fextract) = Parser step initial extract -- {-# INLINE takeWhile #-} takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b -takeWhile predicate (Fold fstep finitial fextract) = - Parser step initial (liftExtract fextract) - - where - - initial = liftInitialM finitial - - step s a = - if predicate a - then Partial 0 <$> liftStep fstep s a - else Done 1 <$> liftExtract fextract s +takeWhile predicate fld = fromFold (FL.ltakeWhile predicate fld) -- | See 'Streamly.Internal.Data.Parser.takeWhile1'. -- @@ -440,20 +404,29 @@ takeWhile1 predicate (Fold fstep finitial fextract) = if predicate a then do s <- finitial - r <- fstep s a - return $ Partial 0 (Just r) - else return $ Error "takeWhile1: empty" + sr <- fstep s a + return + $ case sr of + FL.Partial r -> Partial 0 (Just r) + FL.Done b -> Done 0 b + FL.Done1 _ -> Error err + else return $ Error err step (Just s) a = if predicate a then do - r <- liftStep fstep s a - return $ Partial 0 (Just r) + sr <- fstep s a + case sr of + FL.Partial r -> return $ Partial 0 (Just r) + FL.Done b -> return $ Done 0 b + FL.Done1 b -> return $ Done 1 b else do - b <- liftExtract fextract s + b <- fextract s return $ Done 1 b - extract Nothing = throwM $ ParseError "takeWhile1: end of input" - extract (Just s) = liftExtract fextract s + extract Nothing = throwM $ ParseError err + extract (Just s) = fextract s + + err = "takeWhile1: end of input" -- | See 'Streamly.Internal.Data.Parser.sliceSepBy'. -- @@ -461,16 +434,7 @@ takeWhile1 predicate (Fold fstep finitial fextract) = -- {-# INLINABLE sliceSepBy #-} sliceSepBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser m a b -sliceSepBy predicate (Fold fstep finitial fextract) = - Parser step initial (liftExtract fextract) - - where - - initial = liftInitialM finitial - step s a = - if not (predicate a) - then Partial 0 <$> liftStep fstep s a - else Done 0 <$> liftExtract fextract s +sliceSepBy predicate fld = fromFold $ FL.sliceSepBy predicate fld -- | See 'Streamly.Internal.Data.Parser.sliceEndWith'. -- @@ -499,25 +463,7 @@ sliceBeginWith = undefined {-# INLINABLE sliceSepByMax #-} sliceSepByMax :: Monad m => (a -> Bool) -> Int -> Fold m a b -> Parser m a b -sliceSepByMax predicate cnt (Fold fstep finitial fextract) = - Parser step initial extract - - where - - initial = Tuple' 0 <$> liftInitialM finitial - - step (Tuple' i r) a - | not (predicate a) = - if i < cnt - then do - res <- liftStep fstep r a - let i1 = i + 1 - s1 = Tuple' i1 res - return $ Partial 0 s1 - else Done 1 <$> liftExtract fextract r - | otherwise = Done 0 <$> liftExtract fextract r - - extract (Tuple' _ r) = liftExtract fextract r +sliceSepByMax p n = sliceSepBy p . FL.ltake n -- | See 'Streamly.Internal.Data.Parser.wordBy'. -- @@ -555,18 +501,24 @@ eqBy cmp str = Parser step initial extract initial = return str step [] _ = return $ Done 0 () - step [x] a = return $ - if x `cmp` a - then Done 0 () - else Error "eqBy: failed, yet to match the last element" - step (x:xs) a = return $ - if x `cmp` a - then Continue 0 xs - else Error $ - "eqBy: failed, yet to match " ++ show (length xs + 1) ++ " elements" - - extract xs = throwM $ ParseError $ - "eqBy: end of input, yet to match " ++ show (length xs) ++ " elements" + step [x] a = + return + $ if x `cmp` a + then Done 0 () + else Error "eqBy: failed, yet to match the last element" + step (x:xs) a = + return + $ if x `cmp` a + then Continue 0 xs + else Error + $ "eqBy: failed, yet to match " + ++ show (length xs + 1) ++ " elements" + + extract xs = + throwM + $ ParseError + $ "eqBy: end of input, yet to match " + ++ show (length xs) ++ " elements" ------------------------------------------------------------------------------- -- nested parsers @@ -578,8 +530,7 @@ eqBy cmp str = Parser step initial extract -- {-# INLINE lookAhead #-} lookAhead :: MonadThrow m => Parser m a b -> Parser m a b -lookAhead (Parser step1 initial1 _) = - Parser step initial extract +lookAhead (Parser step1 initial1 _) = Parser step initial extract where @@ -588,17 +539,20 @@ lookAhead (Parser step1 initial1 _) = step (Tuple' cnt st) a = do r <- step1 st a let cnt1 = cnt + 1 - return $ case r of - Partial n s -> Continue n (Tuple' (cnt1 - n) s) - Continue n s -> Continue n (Tuple' (cnt1 - n) s) - Done _ b -> Done cnt1 b - Error err -> Error err + return + $ case r of + Partial n s -> Continue n (Tuple' (cnt1 - n) s) + Continue n s -> Continue n (Tuple' (cnt1 - n) s) + Done _ b -> Done cnt1 b + Error err -> Error err -- XXX returning an error let's us backtrack. To implement it in a way so -- that it terminates on eof without an error then we need a way to -- backtrack on eof, that will require extract to return 'Step' type. - extract (Tuple' n _) = throwM $ ParseError $ - "lookAhead: end of input after consuming " ++ show n ++ " elements" + extract (Tuple' n _) = + throwM + $ ParseError + $ "lookAhead: end of input after consuming " ++ show n ++ " elements" ------------------------------------------------------------------------------- -- Interleaving @@ -689,7 +643,9 @@ count :: count n = countBetween n n -- count n f p = many (takeEQ n f) p -data ManyTillState fs sr sl = ManyTillR Int fs sr | ManyTillL fs sl +data ManyTillState fs sr sl + = ManyTillR Int fs sr + | ManyTillL Int fs sl -- | See 'Streamly.Internal.Data.Parser.manyTill'. -- @@ -705,9 +661,7 @@ manyTill (Fold fstep finitial fextract) where - initial = do - fs <- liftInitialM finitial - ManyTillR 0 fs <$> initialR + initial = ManyTillR 0 <$> finitial <*> initialR step (ManyTillR cnt fs st) a = do r <- stepR st a @@ -717,22 +671,34 @@ manyTill (Fold fstep finitial fextract) assert (cnt + 1 - n >= 0) (return ()) return $ Continue n (ManyTillR (cnt + 1 - n) fs s) Done n _ -> do - b <- liftExtract fextract fs + b <- fextract fs return $ Done n b Error _ -> do rR <- initialL - return $ Continue (cnt + 1) (ManyTillL fs rR) - - step (ManyTillL fs st) a = do + return $ Continue (cnt + 1) (ManyTillL 0 fs rR) + step (ManyTillL cnt fs st) a = do r <- stepL st a case r of - Partial n s -> return $ Partial n (ManyTillL fs s) - Continue n s -> return $ Continue n (ManyTillL fs s) + Partial n s -> return $ Partial n (ManyTillL 0 fs s) + Continue n s -> do + assert (cnt + 1 - n >= 0) (return ()) + return $ Continue n (ManyTillL (cnt + 1 - n) fs s) Done n b -> do - fs1 <- liftStep fstep fs b - l <- initialR - return $ Partial n (ManyTillR 0 fs1 l) + sfs1 <- fstep fs b + case sfs1 of + FL.Partial fs1 -> do + l <- initialR + return $ Partial n (ManyTillR 0 fs1 l) + FL.Done fb -> return $ Done n fb + FL.Done1 fb -> do + assert (cnt + 1 - n >= 0) (return ()) + return $ Done (cnt + 1) fb Error err -> return $ Error err - extract (ManyTillL fs sR) = extractL sR >>= liftStep fstep fs >>= liftExtract fextract - extract (ManyTillR _ fs _) = liftExtract fextract fs + extract (ManyTillL _ fs sR) = do + res <- extractL sR >>= fstep fs + case res of + FL.Partial sres -> fextract sres + FL.Done bres -> return bres + FL.Done1 bres -> return bres + extract (ManyTillR _ fs _) = fextract fs diff --git a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs index 3eab7d4f0c..a75c794c29 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs @@ -134,10 +134,11 @@ import Control.Exception (assert, Exception(..)) import Control.Monad (MonadPlus(..)) import Control.Monad.Catch (MonadCatch, try, throwM, MonadThrow) import Fusion.Plugin.Types (Fuse(..)) -import Streamly.Internal.Data.Fold.Types (liftInitialM, liftStep, liftExtract) import Streamly.Internal.Data.Fold (Fold(..), toList) import Streamly.Internal.Data.Tuple.Strict (Tuple3'(..)) +import qualified Streamly.Internal.Data.Fold as FL + import Prelude hiding (concatMap) -- | The return type of a 'Parser' step. @@ -429,6 +430,7 @@ alt (Parser stepL initialL extractL) (Parser stepR initialR extractR) = extract (AltParseR sR) = extractR sR extract (AltParseL _ sL) = extractL sL +-- XXX We are ignoring the Error? -- | See documentation of 'Streamly.Internal.Data.Parser.many'. -- -- /Internal/ @@ -436,13 +438,13 @@ alt (Parser stepL initialL extractL) (Parser stepR initialR extractR) = {-# INLINE splitMany #-} splitMany :: MonadCatch m => Fold m b c -> Parser m a b -> Parser m a c splitMany (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = - Parser step initial extract + Parser step initial extract where initial = do ps <- initial1 -- parse state - fs <- liftInitialM finitial -- fold state + fs <- finitial -- fold state pure (Tuple3' ps (0 :: Int) fs) {-# INLINE step #-} @@ -458,19 +460,30 @@ splitMany (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = return $ Continue n (Tuple3' s (cnt1 - n) fs) Done n b -> do s <- initial1 - fs1 <- liftStep fstep fs b - return $ Partial n (Tuple3' s 0 fs1) + fs1 <- fstep fs b + return + $ case fs1 of + FL.Partial s1 -> Partial n (Tuple3' s 0 s1) + FL.Done b1 -> Done n b1 + FL.Done1 b1 -> Done cnt1 b1 Error _ -> do - xs <- liftExtract fextract fs + xs <- fextract fs return $ Done cnt1 xs - -- XXX The "try" may impact performance if this parser is used as a scan + extract (Tuple3' s _ fs) = do r <- try $ extract1 s case r of - Left (_ :: ParseError) -> liftExtract fextract fs - Right b -> liftStep fstep fs b >>= liftExtract fextract - + Left (_ :: ParseError) -> fextract fs + Right b -> do + fs1 <- fstep fs b + case fs1 of + FL.Partial s1 -> fextract s1 + FL.Done b1 -> return b1 + FL.Done1 b1 -> return b1 + +-- XXX Unwrap Either into their own constructors? +-- XXX I think haskell automatically does this though. Need to check. -- | See documentation of 'Streamly.Internal.Data.Parser.some'. -- -- /Internal/ @@ -484,19 +497,23 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = initial = do ps <- initial1 -- parse state - fs <- liftInitialM finitial -- fold state + fs <- finitial -- fold state pure (Tuple3' ps (0 :: Int) (Left fs)) {-# INLINE step #-} step (Tuple3' st _ (Left fs)) a = do r <- step1 st a case r of - Partial n s -> return $ Continue n (Tuple3' s 0 (Left fs)) - Continue n s -> return $ Continue n (Tuple3' s 0 (Left fs)) + Partial n s -> return $ Continue n (Tuple3' s undefined (Left fs)) + Continue n s -> return $ Continue n (Tuple3' s undefined (Left fs)) Done n b -> do s <- initial1 - fs1 <- liftStep fstep fs b - return $ Partial n (Tuple3' s 0 (Right fs1)) + fs1 <- fstep fs b + return + $ case fs1 of + FL.Partial s1 -> Partial n (Tuple3' s 0 (Right s1)) + FL.Done b1 -> Done n b1 + FL.Done1 b1 -> Done n b1 Error err -> return $ Error err step (Tuple3' st cnt (Right fs)) a = do r <- step1 st a @@ -510,17 +527,32 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = return $ Continue n (Tuple3' s (cnt1 - n) (Right fs)) Done n b -> do s <- initial1 - fs1 <- liftStep fstep fs b - return $ Partial n (Tuple3' s 0 (Right fs1)) - Error _ -> Done cnt1 <$> liftExtract fextract fs - + fs1 <- fstep fs b + return + $ case fs1 of + FL.Partial s1 -> Partial n (Tuple3' s 0 (Right s1)) + FL.Done b1 -> Done n b1 + FL.Done1 b1 -> Done cnt1 b1 + Error _ -> Done cnt1 <$> fextract fs -- XXX The "try" may impact performance if this parser is used as a scan - extract (Tuple3' s _ (Left fs)) = extract1 s >>= liftStep fstep fs >>= liftExtract fextract + + extract (Tuple3' s _ (Left fs)) = do + b <- extract1 s + fs1 <- fstep fs b + case fs1 of + FL.Partial s1 -> fextract s1 + FL.Done b1 -> return b1 + FL.Done1 b1 -> return b1 extract (Tuple3' s _ (Right fs)) = do r <- try $ extract1 s case r of - Left (_ :: ParseError) -> liftExtract fextract fs - Right b -> liftStep fstep fs b >>= liftExtract fextract + Left (_ :: ParseError) -> fextract fs + Right b -> do + fs1 <- fstep fs b + case fs1 of + FL.Partial s1 -> fextract s1 + FL.Done b1 -> return b1 + FL.Done1 b1 -> return b1 -- | See 'Streamly.Internal.Data.Parser.die'. -- diff --git a/src/Streamly/Internal/Data/SmallArray.hs b/src/Streamly/Internal/Data/SmallArray.hs index c1f895ccfc..d72ae853ff 100644 --- a/src/Streamly/Internal/Data/SmallArray.hs +++ b/src/Streamly/Internal/Data/SmallArray.hs @@ -107,15 +107,19 @@ foldr f z arr = runIdentity $ D.foldr f z $ toStreamD arr {-# INLINE_NORMAL writeN #-} writeN :: MonadIO m => Int -> Fold m a (SmallArray a) writeN limit = Fold step initial extract - where + + where + initial = do marr <- liftIO $ newSmallArray limit bottomElement return (Tuple' marr 0) - step (Tuple' marr i) x - | i == limit = fmap FL.Done $ liftIO $ freezeSmallArray marr 0 i + + step st@(Tuple' marr i) x + | i == limit = FL.Done <$> extract st | otherwise = do liftIO $ writeSmallArray marr i x - FL.partialM (Tuple' marr (i + 1)) + return $ FL.Partial (Tuple' marr (i + 1)) + extract (Tuple' marr len) = liftIO $ freezeSmallArray marr 0 len {-# INLINE_NORMAL fromStreamDN #-} @@ -165,7 +169,7 @@ toStreamRev = D.fromStreamD . toStreamDRev {-# INLINE fold #-} fold :: Monad m => Fold m a b -> SmallArray a -> m b -fold f arr = D.runFold f (toStreamD arr) +fold f arr = D.foldOnce f (toStreamD arr) {-# INLINE streamFold #-} streamFold :: Monad m => (SerialT m a -> m b) -> SmallArray a -> m b diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index 8e0a52852d..1035cb84e0 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -1415,7 +1415,7 @@ foldlM' step begin m = S.foldlM' step begin $ toStreamS m -- @since 0.7.0 {-# INLINE fold #-} fold :: Monad m => Fold m a b -> SerialT m a -> m b -fold = P.runFold +fold = P.foldOnce ------------------------------------------------------------------------------ -- Running a sink @@ -2346,14 +2346,14 @@ scanl1' step m = fromStreamD $ D.scanl1' step $ toStreamD m -- @since 0.7.0 {-# INLINE scan #-} scan :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -scan = P.scanFold +scan = P.scanOnce -- | Postscan a stream using the given monadic fold. -- -- @since 0.7.0 {-# INLINE postscan #-} postscan :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -postscan = P.postscanFold +postscan = P.postscanOnce ------------------------------------------------------------------------------ -- Stateful Transformations @@ -4055,7 +4055,7 @@ splitWithSuffix :: (IsStream t, Monad m) => (a -> Bool) -> Fold m a b -> t m a -> t m b splitWithSuffix predicate f m = - D.fromStreamD $ D.splitSuffixBy' predicate f (D.toStreamD m) + D.fromStreamD $ D.splitSuffixWith predicate f (D.toStreamD m) ------------------------------------------------------------------------------ -- Split on a delimiter sequence @@ -4652,14 +4652,16 @@ data SessionState t m k a b = SessionState #undef Type +-- XXX Perhaps we should use an "Event a" type to represent timestamped data. +-- XXX I've replaced it with the most natural implementation. Check logic. -- | @classifySessionsBy tick timeout idle pred f stream@ groups timestamped -- events in an input event stream into sessions based on a session key. Each --- element in the stream is an event consisting of a triple @(session key, +-- element in the input stream is an event consisting of a triple @(session key, -- sesssion data, timestamp)@. @session key@ is a key that uniquely identifies --- the session. All the events belonging to a session are folded using the --- fold @f@ until the fold returns a 'Left' result or a timeout has occurred. --- The session key and the result of the fold are emitted in the output stream --- when the session is purged. +-- the session. All the events belonging to a session are folded using the fold +-- @f@ until the fold terminates or a timeout has occurred. The session key and +-- the result of the fold are emitted in the output stream when the session is +-- purged. -- -- When @idle@ is 'False', @timeout@ is the maximum lifetime of a session in -- seconds, measured from the @timestamp@ of the first event in that session. @@ -4681,35 +4683,33 @@ data SessionState t m k a b = SessionState -- -- /Internal/ -- - --- XXX Perhaps we should use an "Event a" type to represent timestamped data. {-# INLINABLE classifySessionsBy #-} -classifySessionsBy - :: (IsStream t, MonadAsync m, Ord k) - => Double -- ^ timer tick in seconds - -> Double -- ^ session timeout in seconds - -> Bool -- ^ reset the timeout when an event is received +classifySessionsBy :: + (IsStream t, MonadAsync m, Ord k) + => Double -- ^ timer tick in seconds + -> Double -- ^ session timeout in seconds + -> Bool -- ^ reset the timeout when an event is received -> (Int -> m Bool) -- ^ predicate to eject sessions based on session count - -> Fold m a (Either b b) -- ^ Fold to be applied to session events + -> Fold m a b -- ^ Fold to be applied to session events -> t m (k, a, AbsTime) -- ^ session key, data, timestamp -> t m (k, b) -- ^ session key, fold result -classifySessionsBy tick tmout reset ejectPred - (Fold step initial extract) str = - concatMap sessionOutputStream $ - scanlMAfter' sstep (return szero) flush stream +classifySessionsBy tick tmout reset ejectPred (Fold step initial extract) str = + concatMap sessionOutputStream + $ scanlMAfter' sstep (return szero) flush stream where timeoutMs = toRelTime (round (tmout * 1000) :: MilliSecond64) tickMs = toRelTime (round (tick * 1000) :: MilliSecond64) - szero = SessionState - { sessionCurTime = toAbsTime (0 :: MilliSecond64) - , sessionEventTime = toAbsTime (0 :: MilliSecond64) - , sessionCount = 0 - , sessionTimerHeap = H.empty - , sessionKeyValueMap = Map.empty - , sessionOutputStream = K.nil - } + szero = + SessionState + { sessionCurTime = toAbsTime (0 :: MilliSecond64) + , sessionEventTime = toAbsTime (0 :: MilliSecond64) + , sessionCount = 0 + , sessionTimerHeap = H.empty + , sessionKeyValueMap = Map.empty + , sessionOutputStream = K.nil + } -- We can eject sessions based on the current session count to limit -- memory consumption. There are two possible strategies: @@ -4720,9 +4720,8 @@ classifySessionsBy tick tmout reset ejectPred -- old ones. -- -- We use the first strategy as of now. - -- Got a new stream input element - sstep session@SessionState{..} (Just (key, value, timestamp)) = do + sstep session@SessionState {..} (Just (key, value, timestamp)) = do -- XXX we should use a heap in pinned memory to scale it to a large -- size -- @@ -4736,95 +4735,91 @@ classifySessionsBy tick tmout reset ejectPred -- better performance. -- let curTime = max sessionEventTime timestamp - accumulate v = do - old <- case v of - Nothing -> FL.liftInitialM initial - Just (Tuple' _ acc) -> return acc - new <- FL.liftStep step old value - return $ Tuple' timestamp new mOld = Map.lookup key sessionKeyValueMap - - acc@(Tuple' _ fres) <- accumulate mOld - res <- FL.liftExtract extract fres - case res of - Left x -> do + old <- + case mOld of + Nothing -> initial + Just (Tuple' _ acc) -> return acc + res <- step old value + let onTerminate x = do -- deleting a key from the heap is expensive, so we never -- delete a key from heap, we just purge it from the Map and it -- gets purged from the heap on timeout. We just need an extra -- lookup in the Map when the key is purged from the heap, that -- should not be expensive. -- - let (mp, cnt) = case mOld of - Nothing -> (sessionKeyValueMap, sessionCount) - Just _ -> (Map.delete key sessionKeyValueMap - , sessionCount - 1) - return $ session - { sessionCurTime = curTime - , sessionEventTime = curTime - , sessionCount = cnt - , sessionKeyValueMap = mp - , sessionOutputStream = yield (key, x) - } - Right _ -> do - (hp1, mp1, out1, cnt1) <- do - let vars = (sessionTimerHeap, sessionKeyValueMap, - K.nil, sessionCount) + let (mp, cnt) = case mOld of - -- inserting new entry - Nothing -> do - -- Eject a session from heap and map is needed - eject <- ejectPred sessionCount - (hp, mp, out, cnt) <- - if eject - then ejectOne vars - else return vars - - -- Insert the new session in heap - let expiry = addToAbsTime timestamp timeoutMs - hp' = H.insert (Entry expiry key) hp - in return (hp', mp, out, cnt + 1) - -- updating old entry - Just _ -> return vars - + Nothing -> (sessionKeyValueMap, sessionCount) + Just _ -> + ( Map.delete key sessionKeyValueMap + , sessionCount - 1) + return + $ session + { sessionCurTime = curTime + , sessionEventTime = curTime + , sessionCount = cnt + , sessionKeyValueMap = mp + , sessionOutputStream = yield (key, x) + } + case res of + -- Although the fold did not consume the element, it is part of a + -- bigger obejct which was used. Check this behaviour? + FL.Done1 x -> onTerminate x + FL.Done x -> onTerminate x + FL.Partial new -> do + let acc = Tuple' timestamp new + (hp1, mp1, out1, cnt1) <- + do let vars = + ( sessionTimerHeap + , sessionKeyValueMap + , K.nil + , sessionCount) + case mOld of + -- inserting new entry + Nothing -> do + -- Eject a session from heap and map is needed + eject <- ejectPred sessionCount + (hp, mp, out, cnt) <- + if eject + then ejectOne vars + else return vars + -- Insert the new session in heap + let expiry = addToAbsTime timestamp timeoutMs + hp' = H.insert (Entry expiry key) hp + in return (hp', mp, out, cnt + 1) + -- updating old entry + Just _ -> return vars let mp2 = Map.insert key acc mp1 - return $ SessionState - { sessionCurTime = curTime - , sessionEventTime = curTime - , sessionCount = cnt1 - , sessionTimerHeap = hp1 - , sessionKeyValueMap = mp2 - , sessionOutputStream = out1 - } - + return + $ SessionState + { sessionCurTime = curTime + , sessionEventTime = curTime + , sessionCount = cnt1 + , sessionTimerHeap = hp1 + , sessionKeyValueMap = mp2 + , sessionOutputStream = out1 + } -- Got a timer tick event - sstep sessionState@SessionState{..} Nothing = + sstep sessionState@SessionState {..} Nothing = let curTime = addToAbsTime sessionCurTime tickMs - in ejectExpired sessionState curTime + in ejectExpired sessionState curTime - flush session@SessionState{..} = do + flush session@SessionState {..} = do (hp', mp', out, count) <- - ejectAll - ( sessionTimerHeap - , sessionKeyValueMap - , K.nil - , sessionCount - ) - return $ session - { sessionCount = count - , sessionTimerHeap = hp' - , sessionKeyValueMap = mp' - , sessionOutputStream = out - } - - fromEither e = - case e of - Left x -> x - Right x -> x + ejectAll (sessionTimerHeap, sessionKeyValueMap, K.nil, sessionCount) + return + $ session + { sessionCount = count + , sessionTimerHeap = hp' + , sessionKeyValueMap = mp' + , sessionOutputStream = out + } -- delete from map and output the fold accumulator ejectEntry hp mp out cnt acc key = do - sess <- FL.liftExtract extract acc - let out1 = (key, fromEither sess) `K.cons` out + sess <- extract acc + let out1 = (key, sess) `K.cons` out let mp1 = Map.delete key mp return (hp, mp1, out1, cnt - 1) @@ -4832,9 +4827,10 @@ classifySessionsBy tick tmout reset ejectPred let hres = H.uncons hp case hres of Just (Entry _ key, hp1) -> do - r <- case Map.lookup key mp of - Nothing -> return (hp1, mp, out, cnt) - Just (Tuple' _ acc) -> ejectEntry hp1 mp out cnt acc key + r <- + case Map.lookup key mp of + Nothing -> return (hp1, mp, out, cnt) + Just (Tuple' _ acc) -> ejectEntry hp1 mp out cnt acc key ejectAll r Nothing -> do assert (Map.null mp) (return ()) @@ -4850,24 +4846,24 @@ classifySessionsBy tick tmout reset ejectPred let expiry1 = addToAbsTime latestTS timeoutMs if not reset || expiry1 <= expiry then ejectEntry hp1 mp out cnt acc key - else - -- reset the session timeout and continue - let hp2 = H.insert (Entry expiry1 key) hp1 - in ejectOne (hp2, mp, out, cnt) + else -- reset the session timeout and continue + let hp2 = H.insert (Entry expiry1 key) hp1 + in ejectOne (hp2, mp, out, cnt) Nothing -> do assert (Map.null mp) (return ()) return (hp, mp, out, cnt) - ejectExpired session@SessionState{..} curTime = do + ejectExpired session@SessionState {..} curTime = do (hp', mp', out, count) <- ejectLoop sessionTimerHeap sessionKeyValueMap K.nil sessionCount - return $ session - { sessionCurTime = curTime - , sessionCount = count - , sessionTimerHeap = hp' - , sessionKeyValueMap = mp' - , sessionOutputStream = out - } + return + $ session + { sessionCurTime = curTime + , sessionCount = count + , sessionTimerHeap = hp' + , sessionKeyValueMap = mp' + , sessionOutputStream = out + } where @@ -4882,20 +4878,17 @@ classifySessionsBy tick tmout reset ejectPred r <- ejectPred cnt return (r, r) if eject - then - case Map.lookup key mp of - Nothing -> ejectLoop hp1 mp out cnt - Just (Tuple' latestTS acc) -> do - let expiry1 = addToAbsTime latestTS timeoutMs - if expiry1 <= curTime || not reset || force - then do - (hp2,mp1,out1,cnt1) <- - ejectEntry hp1 mp out cnt acc key - ejectLoop hp2 mp1 out1 cnt1 - else - -- reset the session timeout and continue - let hp2 = H.insert (Entry expiry1 key) hp1 - in ejectLoop hp2 mp out cnt + then case Map.lookup key mp of + Nothing -> ejectLoop hp1 mp out cnt + Just (Tuple' latestTS acc) -> do + let expiry1 = addToAbsTime latestTS timeoutMs + if expiry1 <= curTime || not reset || force + then do + (hp2, mp1, out1, cnt1) <- + ejectEntry hp1 mp out cnt acc key + ejectLoop hp2 mp1 out1 cnt1 + else let hp2 = H.insert (Entry expiry1 key) hp1 + in ejectLoop hp2 mp out cnt else return (hp, mp, out, cnt) Nothing -> do assert (Map.null mp) (return ()) @@ -4921,11 +4914,11 @@ classifySessionsBy tick tmout reset ejectPred -- /Internal/ -- {-# INLINABLE classifyKeepAliveSessions #-} -classifyKeepAliveSessions - :: (IsStream t, MonadAsync m, Ord k) - => Double -- ^ session inactive timeout +classifyKeepAliveSessions :: + (IsStream t, MonadAsync m, Ord k) + => Double -- ^ session inactive timeout -> (Int -> m Bool) -- ^ predicate to eject sessions on session count - -> Fold m a (Either b b) -- ^ Fold to be applied to session payload data + -> Fold m a b -- ^ Fold to be applied to session payload data -> t m (k, a, AbsTime) -- ^ session key, data, timestamp -> t m (k, b) classifyKeepAliveSessions tmout = @@ -4988,11 +4981,11 @@ classifyChunksOf wsize = classifyChunksBy wsize False -- /Internal/ -- {-# INLINABLE classifySessionsOf #-} -classifySessionsOf - :: (IsStream t, MonadAsync m, Ord k) - => Double -- ^ time window size +classifySessionsOf :: + (IsStream t, MonadAsync m, Ord k) + => Double -- ^ time window size -> (Int -> m Bool) -- ^ predicate to eject sessions on session count - -> Fold m a (Either b b) -- ^ Fold to be applied to session events + -> Fold m a b -- ^ Fold to be applied to session events -> t m (k, a, AbsTime) -- ^ session key, data, timestamp -> t m (k, b) classifySessionsOf interval = diff --git a/src/Streamly/Internal/Data/Stream/Prelude.hs b/src/Streamly/Internal/Data/Stream/Prelude.hs index 485fc378b7..cb6a7915e0 100644 --- a/src/Streamly/Internal/Data/Stream/Prelude.hs +++ b/src/Streamly/Internal/Data/Stream/Prelude.hs @@ -33,7 +33,7 @@ module Streamly.Internal.Data.Stream.Prelude , foldlx' , foldlMx' , foldl' - , runFold + , foldOnce , parselMx' -- Lazy left folds are useful only for reversing the stream @@ -44,8 +44,8 @@ module Streamly.Internal.Data.Stream.Prelude , scanlMx' , postscanlx' , postscanlMx' - , postscanFold - , scanFold + , postscanOnce + , scanOnce -- * Zip style operations , eqBy @@ -202,9 +202,9 @@ foldlT :: (Monad m, IsStream t, Monad (s m), MonadTrans s) => (s m b -> a -> s m b) -> s m b -> t m a -> s m b foldlT f z s = S.foldlT f z (toStreamS s) -{-# INLINE runFold #-} -runFold :: (Monad m, IsStream t) => Fold m a b -> t m a -> m b -runFold fld m = S.runFold fld $ toStreamS m +{-# INLINE foldOnce #-} +foldOnce :: (Monad m, IsStream t) => Fold m a b -> t m a -> m b +foldOnce fld m = S.foldOnce fld $ toStreamS m ------------------------------------------------------------------------------ -- Scans @@ -232,16 +232,16 @@ scanlMx' :: (IsStream t, Monad m) scanlMx' step begin done m = D.fromStreamD $ D.scanlMx' step begin done $ D.toStreamD m -{-# INLINE_NORMAL postscanFold #-} -postscanFold :: (IsStream t, Monad m) +{-# INLINE_NORMAL postscanOnce #-} +postscanOnce :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -postscanFold fld m = - D.fromStreamD $ D.postscanFold fld $ D.toStreamD m +postscanOnce fld m = + D.fromStreamD $ D.postscanOnce fld $ D.toStreamD m -{-# INLINE scanFold #-} -scanFold :: (IsStream t, Monad m) +{-# INLINE scanOnce #-} +scanOnce :: (IsStream t, Monad m) => Fold m a b -> t m a -> t m b -scanFold fld m = D.fromStreamD $ D.scanFold fld $ D.toStreamD m +scanOnce fld m = D.fromStreamD $ D.scanOnce fld $ D.toStreamD m -- scanl followed by map -- diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index e3a7a45792..e7566af6e4 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -102,7 +102,7 @@ module Streamly.Internal.Data.Stream.StreamD , foldlx' , foldlMx' - , runFold + , foldOnce , parselMx' , parseMany @@ -166,7 +166,7 @@ module Streamly.Internal.Data.Stream.StreamD , splitBy , splitSuffixBy , wordsBy - , splitSuffixBy' + , splitSuffixWith , splitOn , splitSuffixOn @@ -224,8 +224,8 @@ module Streamly.Internal.Data.Stream.StreamD , postscanlMx' , scanlMx' , scanlx' - , postscanFold - , scanFold + , postscanOnce + , scanOnce -- * Filtering , filter @@ -325,6 +325,7 @@ import Data.Word (Word32) import Foreign.Ptr (Ptr) import Foreign.Storable (Storable(..)) import GHC.Types (SPEC(..)) +import GHC.IO (unsafePerformIO) import System.Mem (performMajorGC) import Fusion.Plugin.Types (Fuse(..)) import Streamly.Internal.Data.IORef.Prim (Prim) @@ -332,14 +333,14 @@ import Streamly.Internal.Data.Time.Units (TimeUnit64, toRelTime64, diffAbsTime64, RelTime64) import Streamly.Internal.Data.Atomics (atomicModifyIORefCAS_) import Streamly.Internal.Data.Array.Storable.Foreign.Types (Array(..)) -import Streamly.Internal.Data.Fold.Types (Fold(..), liftStep, liftExtract, liftInitialM) +import Streamly.Internal.Data.Fold.Types (Fold(..)) import Streamly.Internal.Data.Parser (ParseError(..)) import Streamly.Internal.Data.Pipe.Types (Pipe(..), PipeState(..)) import Streamly.Internal.Data.Time.Clock (Clock(Monotonic), getTime) import Streamly.Internal.Data.Time.Units (MicroSecond64(..), fromAbsTime, toAbsTime, AbsTime) import Streamly.Internal.Data.Unfold.Types (Unfold(..)) -import Streamly.Internal.Data.Tuple.Strict (Tuple3'(..)) +import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import Streamly.Internal.Data.Stream.SVar (fromConsumer, pushToFold) import qualified Streamly.Internal.Data.IORef.Prim as Prim @@ -1507,42 +1508,10 @@ reverse' m = -- Grouping/Splitting ------------------------------------------------------------------------------ -{-# INLINE_NORMAL splitSuffixBy' #-} -splitSuffixBy' :: Monad m +{-# INLINE_NORMAL splitSuffixWith #-} +splitSuffixWith :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitSuffixBy' predicate f (Stream step state) = - Stream (stepOuter f) (Just state) - - where - - {-# INLINE_LATE stepOuter #-} - stepOuter (Fold fstep initial done) gst (Just st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - if (predicate x) - then liftExtract done acc' >>= \val -> return $ Yield val (Just s) - else go SPEC s acc' - - Skip s -> return $ Skip $ Just s - Stop -> return Stop - - where - - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - acc' <- liftStep fstep acc x - if (predicate x) - then liftExtract done acc' >>= \val -> return $ Yield val (Just s) - else go SPEC s acc' - Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \val -> return $ Yield val Nothing - - stepOuter _ _ Nothing = return Stop +splitSuffixWith predicate f = foldMany1 (FL.sliceSepWith predicate f) {-# INLINE_NORMAL groupsBy #-} groupsBy :: Monad m @@ -1550,57 +1519,7 @@ groupsBy :: Monad m -> Fold m a b -> Stream m a -> Stream m b -groupsBy cmp f (Stream step state) = Stream (stepOuter f) (Just state, Nothing) - - where - - {-# INLINE_LATE stepOuter #-} - stepOuter (Fold fstep initial done) gst (Just st, Nothing) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - go SPEC x s acc' - - Skip s -> return $ Skip $ (Just s, Nothing) - Stop -> return Stop - - where - - go !_ prev stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp x prev - then do - acc' <- liftStep fstep acc x - go SPEC prev s acc' - else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) - Skip s -> go SPEC prev s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) - - stepOuter (Fold fstep initial done) gst (Just st, Just prev) = do - acc <- initial - acc' <- fstep acc prev - go SPEC st acc' - - where - - -- XXX code duplicated from the previous equation - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp x prev - then do - acc' <- liftStep fstep acc x - go SPEC s acc' - else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) - Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) - - stepOuter _ _ (Nothing,_) = return Stop +groupsBy cmp f = foldMany (FL.groupBy cmp f) {-# INLINE_NORMAL groupsRollingBy #-} groupsRollingBy :: Monad m @@ -1608,156 +1527,79 @@ groupsRollingBy :: Monad m -> Fold m a b -> Stream m a -> Stream m b -groupsRollingBy cmp f (Stream step state) = - Stream (stepOuter f) (Just state, Nothing) - where - - {-# INLINE_LATE stepOuter #-} - stepOuter (Fold fstep initial done) gst (Just st, Nothing) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - go SPEC x s acc' - - Skip s -> return $ Skip $ (Just s, Nothing) - Stop -> return Stop - - where - go !_ prev stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prev x - then do - acc' <- liftStep fstep acc x - go SPEC x s acc' - else - liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) - Skip s -> go SPEC prev s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) - - stepOuter (Fold fstep initial done) gst (Just st, Just prev') = do - acc <- initial - acc' <- fstep acc prev' - go SPEC prev' st acc' - - where - go !_ prevv stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if cmp prevv x - then do - acc' <- liftStep fstep acc x - go SPEC x s acc' - else liftExtract done acc >>= \r -> return $ Yield r (Just s, Just x) - Skip s -> go SPEC prevv s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r (Nothing, Nothing) - - stepOuter _ _ (Nothing, _) = return Stop +groupsRollingBy cmp f = foldMany (FL.groupByRolling cmp f) {-# INLINE_NORMAL splitBy #-} splitBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitBy predicate f (Stream step state) = Stream (step' f) (Just state) - - where - - {-# INLINE_LATE step' #-} - step' (Fold fstep initial done) gst (Just st) = liftInitialM initial >>= go SPEC st - - where - - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if predicate x - then liftExtract done acc >>= \r -> return $ Yield r (Just s) - else do - acc' <- liftStep fstep acc x - go SPEC s acc' - Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing - - step' _ _ Nothing = return Stop +splitBy predicate f = foldMany (FL.sliceSepBy predicate f) -- XXX requires -funfolding-use-threshold=150 in lines-unlines benchmark {-# INLINE_NORMAL splitSuffixBy #-} splitSuffixBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -splitSuffixBy predicate f (Stream step state) = Stream (step' f) (Just state) - - where - - {-# INLINE_LATE step' #-} - step' (Fold fstep initial done) gst (Just st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - if predicate x - then done acc >>= \val -> return $ Yield val (Just s) - else do - acc' <- fstep acc x - go SPEC s acc' - - Skip s -> return $ Skip $ Just s - Stop -> return Stop - - where - - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if predicate x - then liftExtract done acc >>= \r -> return $ Yield r (Just s) - else do - acc' <- liftStep fstep acc x - go SPEC s acc' - Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing +splitSuffixBy predicate f = foldMany1 (FL.sliceSepBy predicate f) - step' _ _ Nothing = return Stop +data WordsByState fs s a b + = WordYield !b !(WordsByState fs s a b) + | WordBegin !s + | WordBeginWith !s !a + | WordFold !fs !s {-# INLINE_NORMAL wordsBy #-} wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -wordsBy predicate f (Stream step state) = Stream (stepOuter f) (Just state) +-- This has a hard time fusing even with simple pipeline. +-- wordsBy predicate f = foldMany (FL.sliceSepTill predicate f) +-- XXX Check this +wordsBy predicate (Fold fstep initial done) (Stream step state) = + Stream step1 (WordBegin state) where - {-# INLINE_LATE stepOuter #-} - stepOuter (Fold fstep initial done) gst (Just st) = do + {-# INLINE_LATE step1 #-} + step1 gst (WordBegin st) = do res <- step (adaptState gst) st case res of Yield x s -> do if predicate x - then return $ Skip (Just s) + then return $ Skip (WordBegin s) else do - acc <- initial - acc' <- fstep acc x - go SPEC s acc' - - Skip s -> return $ Skip $ Just s - Stop -> return Stop - - where - - go !_ stt !acc = do - res <- step (adaptState gst) stt - case res of - Yield x s -> do - if predicate x - then liftExtract done acc >>= \r -> return $ Yield r (Just s) - else do - acc' <- liftStep fstep acc x - go SPEC s acc' - Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r Nothing - - stepOuter _ _ Nothing = return Stop + -- step1 gst (WordBeginWith s x) + ini <- initial + wordFoldWith ini s x + Skip s -> return $ Skip $ WordBegin s + Stop -> return Stop + step1 gst (WordFold fs st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + if predicate x + then do + bres <- done fs + return $ Skip $ WordYield bres (WordBegin s) + else wordFoldWith fs s x + Skip s -> return $ Skip $ WordFold fs s + Stop -> return Stop + step1 _ (WordYield bres ns) = return $ Yield bres ns + step1 _ (WordBeginWith s x) = do + -- XXX We dont need to check for predicate here, we already know "x" + -- does not satisfy the predicate. + ini <- initial + wordFoldWith ini s x + + {-# INLINE wordFoldWith #-} + wordFoldWith fs s x = do + -- XXX We dont need to check for predicate here, we already know "x" + -- does not satisfy the predicate. + fs1 <- fstep fs x + return + $ Skip + $ case fs1 of + FL.Partial sres -> WordFold sres s + FL.Done bres -> WordYield bres (WordBegin s) + -- XXX This will lead to an infinite loop most of the time. But + -- it may terminate as it is an effectful step function. If + -- possible, we should somehow warn the user. + FL.Done1 bres -> WordYield bres (WordBeginWith s x) -- String search algorithms: -- http://www-igm.univ-mlv.fr/~lecroq/string/index.html @@ -1777,14 +1619,22 @@ data SplitOptions = SplitOptions } -} -data SplitOnState s a = +data SplitOnState fs s wrd a x = GO_START | GO_EMPTY_PAT s - | GO_SINGLE_PAT s a + | GO_SINGLE_PAT fs s + | GO_SINGLE_PAT_WITH s a | GO_SHORT_PAT s + | GO_SHORT_PAT_WITH s wrd a + | GO_SHORT_PAT_DRAIN Int fs s wrd | GO_KARP_RABIN s !(RB.Ring a) !(Ptr a) | GO_DONE + +-- XXX Can this be written as smaller folds and be used with foldMany/foldMany1. +-- The logic is basically the same thing, the only catch being that we have rely +-- on GHC for simplification. We can compare the performance and choose +-- accordingly. {-# INLINE_NORMAL splitOn #-} splitOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) @@ -1792,7 +1642,8 @@ splitOn -> Fold m a b -> Stream m a -> Stream m b -splitOn patArr (Fold fstep initial done) (Stream step state) = +splitOn patArr (Fold fstep initial done) (Stream step state) = undefined +{- Stream stepOuter GO_START where @@ -1800,39 +1651,118 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = patLen = A.length patArr maxIndex = patLen - 1 elemBits = sizeOf (undefined :: a) * 8 + -- Since the Array is immutable we can get away with using unsafePerformIO + sngPat = unsafePerformIO $ A.unsafeIndexIO patArr 0 {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = if patLen == 0 then return $ Skip $ GO_EMPTY_PAT state else if patLen == 1 - then do - r <- liftIO $ (A.unsafeIndexIO patArr 0) - return $ Skip $ GO_SINGLE_PAT state r - else if sizeOf (undefined :: a) * patLen - <= sizeOf (undefined :: Word) - then return $ Skip $ GO_SHORT_PAT state - else do - (rb, rhead) <- liftIO $ RB.new patLen - return $ Skip $ GO_KARP_RABIN state rb rhead - - stepOuter gst (GO_SINGLE_PAT stt pat) = liftInitialM initial >>= go SPEC stt + then do + acc <- initial + return $ Skip $ GO_SINGLE_PAT acc state + else if sizeOf (undefined :: a) * patLen + <= sizeOf (undefined :: Word) + then return $ Skip $ GO_SHORT_PAT state + else do + (rb, rhead) <- liftIO $ RB.new patLen + return $ Skip $ GO_KARP_RABIN state rb rhead + ----------------- + -- Single Pattern + ----------------- + -- XXX These functions cab be designed in a better way? Get rid of go and + -- have more info in the state but you would be relying on GHC for + -- simplification. + -- XXX Multiple Yield points + stepOuter _ (GO_SINGLE_PAT_WITH s x) = do + acc <- initial + acc' <- fstep acc x + case acc' of + FL.Partial sres -> return $ Skip $ (GO_SINGLE_PAT sres s) + FL.Done bres -> do + ini <- initial + return $ Yield bres (GO_SINGLE_PAT ini s) + FL.Done1 bres -> return $ Yield bres (GO_SINGLE_PAT_WITH s x) + stepOuter gst (GO_SINGLE_PAT fs stt) = go SPEC stt fs where + -- XXX Multiple Yield points go !_ st !acc = do res <- step (adaptState gst) st case res of Yield x s -> do - if pat == x + if sngPat == x then do - r <- liftExtract done acc - return $ Yield r (GO_SINGLE_PAT s pat) - else liftStep fstep acc x >>= go SPEC s + r <- done acc + ini <- initial + return $ Yield r (GO_SINGLE_PAT ini s) + else do + acc' <- fstep acc x + case acc' of + FL.Partial sres -> go SPEC s sres + FL.Done bres -> do + ini <- initial + return $ Yield bres (GO_SINGLE_PAT ini s) + FL.Done1 bres -> + return $ Yield bres (GO_SINGLE_PAT_WITH s x) Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r GO_DONE + Stop -> done acc >>= \r -> return $ Yield r GO_DONE + --------------------------- + -- Short Pattern - Shift Or + --------------------------- + -- XXX I've adapted the code accordingly but in my opinion redesigning this + -- would be better. + stepOuter _ (GO_SHORT_PAT_WITH s wrd x) = do + acc <- initial + let wrd' = addToWord wrd x + old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) + acc' <- fstep acc (toEnum $ fromIntegral old) + case acc' of + FL.Partial sres -> + if wrd' .&. mask == patWord + then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else return $ Skip $ GO_SHORT_PAT s + FL.Done bres -> return $ Yield bres (GO_SHORT_PAT s) + FL.Done1 bres -> return $ Yield bres (GO_SHORT_PAT_WITH s wrd x) - stepOuter gst (GO_SHORT_PAT stt) = liftInitialM initial >>= go0 SPEC 0 (0 :: Word) stt + where + + -- XXX Duplicated code + mask :: Word + mask = (1 `shiftL` (elemBits * patLen)) - 1 + + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + + patWord :: Word + patWord = mask .&. A.foldl' addToWord 0 patArr + stepOuter _ (GO_SHORT_PAT_DRAIN n0 ac s wd) = go wd n0 ac + + where + + -- XXX Duplicated code + mask :: Word + mask = (1 `shiftL` (elemBits * patLen)) - 1 + + -- Check if this is correct? + -- Consider n == 1, fromIntegral (mask .&. word) is considering more + -- than one element. We should change the mask to ignore the previous + -- elements. + go !wrd !n !acc + | n > 0 = do + let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) + fres <- fstep acc (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> go wrd (n - 1) sres + FL.Done bres -> do + ini <- initial + return $ Yield bres (GO_SHORT_PAT_DRAIN n ini s wrd) + FL.Done1 bres -> do + ini <- initial + return $ GO_SHORT_PAT_DRAIN n ini s wrd + go _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) + stepOuter gst (GO_SHORT_PAT stt) = initial >>= go0 SPEC 0 (0 :: Word) stt where @@ -1853,16 +1783,16 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = then do if wrd' .&. mask == patWord then do - r <- liftExtract done acc + r <- done acc return $ Yield r (GO_SHORT_PAT s) else go1 SPEC wrd' s acc else go0 SPEC (idx + 1) wrd' s acc Skip s -> go0 SPEC idx wrd s acc - Stop -> do - acc' <- if idx /= 0 - then go2 wrd idx acc - else return acc - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + Stop -> + if idx /= 0 + then -- stepOuter gst (GO_SHORT_PAT_DRAIN patLen acc s wrd) + go2 wrd idx acc + else done acc >>= \r -> return (Yield r GO_DONE) {-# INLINE go1 #-} go1 !_ wrd st !acc = do @@ -1871,34 +1801,56 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do let wrd' = addToWord wrd x old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - acc' <- liftStep fstep acc (toEnum $ fromIntegral old) - if wrd' .&. mask == patWord - then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s acc' + acc' <- fstep acc (toEnum $ fromIntegral old) + case acc' of + FL.Partial sres -> + if wrd' .&. mask == patWord + then done sres + >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else go1 SPEC wrd' s sres + FL.Done bres -> return $ Yield bres (GO_SHORT_PAT s) + FL.Done1 bres -> + return $ Yield bres (GO_SHORT_PAT_WITH s wrd x) Skip s -> go1 SPEC wrd s acc - Stop -> do - acc' <- go2 wrd patLen acc - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - go2 !wrd !n !acc | n > 0 = do - let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - liftStep fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) - go2 _ _ acc = return acc - - stepOuter gst (GO_KARP_RABIN stt rb rhead) = do - liftInitialM initial >>= go0 SPEC 0 rhead stt + Stop -> + -- stepOuter gst (GO_SHORT_PAT_DRAIN patLen acc s wrd) + go2 wrd patLen acc + + -- Eliminate this? + go2 !wrd !n !acc + | n > 0 = do + let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) + fres <- fstep acc (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> go2 wrd (n - 1) sres + FL.Done bres -> return $ Yield bres GO_DONE + go2 _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + stepOuter gst (GO_KARP_RABIN stt rb rhead) = + initial >>= go0 SPEC 0 rhead stt where k = 2891336453 :: Word32 coeff = k ^ patLen + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + deltaCksum cksum old new = addCksum cksum new - coeff * fromIntegral (fromEnum old) -- XXX shall we use a random starting hash or 1 instead of 0? patHash = A.foldl' addCksum 0 patArr + -- XXX Have a terminating fold for folding a Ring to eliminate these + liftStep lstep (FL.Partial s) a = lstep s a + liftStep _ x _ = return x + + liftExtract _ (FL.Done b) = return b + liftExtract ldone (FL.Partial s) = ldone s + -- rh == ringHead go0 !_ !idx !rh st !acc = do res <- step (adaptState gst) st @@ -1915,9 +1867,14 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = else go0 SPEC (idx + 1) rh' s acc Skip s -> go0 SPEC idx rh s acc Stop -> do - !acc' <- if idx /= 0 - then RB.unsafeFoldRingM rh (liftStep fstep) acc rb - else return acc + !acc' <- + if idx /= 0 + then RB.unsafeFoldRingM + rh + (liftStep fstep) + (FL.Partial acc) + rb + else return (FL.Partial acc) liftExtract done acc' >>= \r -> return $ Yield r GO_DONE -- XXX Theoretically this code can do 4 times faster if GHC generates @@ -1932,39 +1889,74 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = Yield x s -> do old <- liftIO $ peek rh let cksum' = deltaCksum cksum old x - acc' <- liftStep fstep acc old + acc' <- fstep acc old + case acc' of + FL.Partial sres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2 SPEC cksum' rh' s sres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1 SPEC cksum' rh' s sres + FL.Done bres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2' SPEC cksum' rh' s bres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s bres + Skip s -> go1 SPEC cksum rh s acc + Stop -> do + acc' <- + RB.unsafeFoldRingFullM + rh + (liftStep fstep) + (FL.Partial acc) + rb + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + go1' !_ !cksum !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum' = deltaCksum cksum old x if (cksum' == patHash) then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s acc' + return $ Yield acc (GO_KARP_RABIN s rb rhead) else do rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s acc' - Skip s -> go1 SPEC cksum rh s acc - Stop -> do - acc' <- RB.unsafeFoldRingFullM rh (liftStep fstep) acc rb - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + go1' SPEC cksum' rh' s acc + Skip s -> go1' SPEC cksum rh s acc + Stop -> return $ Yield acc GO_DONE go2 !_ !cksum' !rh' s !acc' = do if RB.unsafeEqArray rb rh' patArr then do - r <- liftExtract done acc' + r <- done acc' return $ Yield r (GO_KARP_RABIN s rb rhead) else go1 SPEC cksum' rh' s acc' + go2' !_ !cksum' !rh' s !acc' = do + if RB.unsafeEqArray rb rh' patArr + then return $ Yield acc' (GO_KARP_RABIN s rb rhead) + else go1' SPEC cksum' rh' s acc' stepOuter gst (GO_EMPTY_PAT st) = do res <- step (adaptState gst) st case res of Yield x s -> do acc <- initial acc' <- fstep acc x - liftExtract done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + case acc' of + FL.Partial sres -> + done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) Skip s -> return $ Skip (GO_EMPTY_PAT s) Stop -> return Stop - stepOuter _ GO_DONE = return Stop - +-} {-# INLINE_NORMAL splitSuffixOn #-} splitSuffixOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) @@ -1974,7 +1966,8 @@ splitSuffixOn -> Stream m a -> Stream m b splitSuffixOn withSep patArr (Fold fstep initial done) - (Stream step state) = + (Stream step state) = undefined +{- Stream stepOuter GO_START where @@ -1982,15 +1975,14 @@ splitSuffixOn withSep patArr (Fold fstep initial done) patLen = A.length patArr maxIndex = patLen - 1 elemBits = sizeOf (undefined :: a) * 8 + sngPat = unsafePerformIO $ A.unsafeIndexIO patArr 0 {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = if patLen == 0 then return $ Skip $ GO_EMPTY_PAT state else if patLen == 1 - then do - r <- liftIO $ (A.unsafeIndexIO patArr 0) - return $ Skip $ GO_SINGLE_PAT state r + then return $ Skip $ GO_SINGLE_PAT state else if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT state @@ -1998,7 +1990,7 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (rb, rhead) <- liftIO $ RB.new patLen return $ Skip $ GO_KARP_RABIN state rb rhead - stepOuter gst (GO_SINGLE_PAT stt pat) = do + stepOuter gst (GO_SINGLE_PAT stt) = do -- This first part is the only difference between splitOn and -- splitSuffixOn. -- If the last element is a separator do not issue a blank segment. @@ -2006,29 +1998,61 @@ splitSuffixOn withSep patArr (Fold fstep initial done) case res of Yield x s -> do acc <- initial - if pat == x - then do - acc' <- if withSep then fstep acc x else FL.partialM acc - liftExtract done acc' >>= \r -> return $ Yield r (GO_SINGLE_PAT s pat) - else fstep acc x >>= go SPEC s - Skip s -> return $ Skip $ (GO_SINGLE_PAT s pat) + if sngPat == x + then if withSep + then do + fres <- fstep acc x + case fres of + FL.Partial sres -> + done sres >>= \r -> + return $ Yield r (GO_SINGLE_PAT s) + FL.Done bres -> + return $ Yield bres (GO_SINGLE_PAT s) + else done acc >>= \r -> + return $ Yield r (GO_SINGLE_PAT s) + else do + fres <- fstep acc x + case fres of + FL.Partial sres -> go SPEC s sres + FL.Done bres -> go' SPEC s bres + Skip s -> return $ Skip $ (GO_SINGLE_PAT s) Stop -> return Stop where - -- This is identical for splitOn and splitSuffixOn go !_ st !acc = do res <- step (adaptState gst) st case res of Yield x s -> do - if pat == x - then do - acc' <- if withSep then liftStep fstep acc x else return acc - r <- liftExtract done acc' - return $ Yield r (GO_SINGLE_PAT s pat) - else liftStep fstep acc x >>= go SPEC s + if sngPat == x + then if withSep + then do + fres <- fstep acc x + case fres of + FL.Partial sres -> + done sres >>= \r -> + return $ Yield r (GO_SINGLE_PAT s) + FL.Done bres -> + return $ Yield bres (GO_SINGLE_PAT s) + else done acc >>= \r -> + return $ Yield r (GO_SINGLE_PAT s) + else do + acc' <- fstep acc x + case acc' of + FL.Partial sres -> go SPEC s sres + FL.Done bres -> go' SPEC s bres Skip s -> go SPEC s acc - Stop -> liftExtract done acc >>= \r -> return $ Yield r GO_DONE + Stop -> done acc >>= \r -> return $ Yield r GO_DONE + + go' !_ st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + if sngPat == x + then return $ Yield acc (GO_SINGLE_PAT s) + else go' SPEC s acc + Skip s -> go' SPEC s acc + Stop -> return $ Yield acc GO_DONE stepOuter gst (GO_SHORT_PAT stt) = do @@ -2044,13 +2068,30 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do acc <- initial let wrd' = addToWord wrd x - acc' <- if withSep then fstep acc x else FL.partialM acc - if idx == maxIndex + if withSep then do - if wrd' .&. mask == patWord - then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go0 SPEC (idx + 1) wrd' s acc' - else go0 SPEC (idx + 1) wrd' s acc' + fres <- fstep acc x + case fres of + FL.Partial sres -> + if idx == maxIndex + then do + if wrd' .&. mask == patWord + then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else go0 SPEC (idx + 1) wrd' s sres + else go0 SPEC (idx + 1) wrd' s sres + FL.Done bres -> + if idx == maxIndex + then do + if wrd' .&. mask == patWord + then return $ Yield bres (GO_SHORT_PAT s) + else go0' SPEC (idx + 1) wrd' s bres + else go0' SPEC (idx + 1) wrd' s bres + else if idx == maxIndex + then do + if wrd' .&. mask == patWord + then done acc >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else go0 SPEC (idx + 1) wrd' s acc + else go0 SPEC (idx + 1) wrd' s acc Skip s -> return $ Skip (GO_SHORT_PAT s) Stop -> return Stop @@ -2069,24 +2110,59 @@ splitSuffixOn withSep patArr (Fold fstep initial done) case res of Yield x s -> do let wrd' = addToWord wrd x - acc' <- if withSep then liftStep fstep acc x else return acc + -- XXX Eliminate common code + if withSep + then do + fres <- fstep acc x + case fres of + FL.Partial sres -> + if idx == maxIndex + then do + if wrd' .&. mask == patWord + then do + r <- done sres + return $ Yield r (GO_SHORT_PAT s) + else go1 SPEC wrd' s sres + else go0 SPEC (idx + 1) wrd' s sres + FL.Done bres -> + if idx == maxIndex + then do + if wrd' .&. mask == patWord + then return $ Yield bres (GO_SHORT_PAT s) + else go1' SPEC wrd' s bres + else go0' SPEC (idx + 1) wrd' s bres + else if idx == maxIndex + then do + if wrd' .&. mask == patWord + then do + r <- done acc + return $ Yield r (GO_SHORT_PAT s) + else go1 SPEC wrd' s acc + else go0 SPEC (idx + 1) wrd' s acc + Skip s -> go0 SPEC idx wrd s acc + Stop -> do + if (idx == maxIndex) && (wrd .&. mask == patWord) + then return Stop + else if idx /= 0 && not withSep + then go2 wrd idx acc + else done acc >>= \r -> return $ Yield r GO_DONE + + go0' !_ !idx wrd st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let wrd' = addToWord wrd x if idx == maxIndex then do if wrd' .&. mask == patWord - then do - r <- liftExtract done acc' - return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s acc' - else go0 SPEC (idx + 1) wrd' s acc' - Skip s -> go0 SPEC idx wrd s acc + then return $ Yield acc (GO_SHORT_PAT s) + else go1' SPEC wrd' s acc + else go0' SPEC (idx + 1) wrd' s acc + Skip s -> go0' SPEC idx wrd s acc Stop -> do if (idx == maxIndex) && (wrd .&. mask == patWord) then return Stop - else do - acc' <- if idx /= 0 && not withSep - then go2 wrd idx acc - else return acc - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + else return $ Yield acc GO_DONE {-# INLINE go1 #-} go1 !_ wrd st !acc = do @@ -2095,28 +2171,63 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do let wrd' = addToWord wrd x old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - acc' <- if withSep - then liftStep fstep acc x - else liftStep fstep acc (toEnum $ fromIntegral old) - if wrd' .&. mask == patWord - then liftExtract done acc' >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s acc' + if withSep + then do + fres <- fstep acc x + case fres of + FL.Partial sres -> + if wrd' .&. mask == patWord + then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else go1 SPEC wrd' s sres + FL.Done bres -> + if wrd' .&. mask == patWord + then return $ Yield bres (GO_SHORT_PAT s) + else go1' SPEC wrd' s bres + else do + fres <- fstep acc (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + if wrd' .&. mask == patWord + then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) + else go1 SPEC wrd' s sres + FL.Done bres -> + if wrd' .&. mask == patWord + then return $ Yield bres (GO_SHORT_PAT s) + else go1' SPEC wrd' s bres Skip s -> go1 SPEC wrd s acc Stop -> -- If the last sequence is a separator do not issue a blank -- segment. if wrd .&. mask == patWord then return Stop - else do - acc' <- if withSep - then return acc - else go2 wrd patLen acc - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + else if withSep + then done acc >>= \r -> return $ Yield r GO_DONE + else go2 wrd patLen acc + + {-# INLINE go1' #-} + go1' !_ wrd st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + let wrd' = addToWord wrd x + if wrd' .&. mask == patWord + then return $ Yield acc (GO_SHORT_PAT s) + else go1' SPEC wrd' s acc + Skip s -> go1' SPEC wrd s acc + Stop -> + -- If the last sequence is a separator do not issue a blank + -- segment. + if wrd .&. mask == patWord + then return Stop + else return $ Yield acc GO_DONE go2 !wrd !n !acc | n > 0 = do let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - liftStep fstep acc (toEnum $ fromIntegral old) >>= go2 wrd (n - 1) - go2 _ _ acc = return acc + fres <- fstep acc (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> go2 wrd (n - 1) sres + FL.Done bres -> return $ Yield bres GO_DONE + go2 _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) stepOuter gst (GO_KARP_RABIN stt rb rhead) = do let idx = 0 @@ -2124,16 +2235,30 @@ splitSuffixOn withSep patArr (Fold fstep initial done) case res of Yield x s -> do acc <- initial - acc' <- if withSep then fstep acc x else FL.partialM acc rh' <- liftIO (RB.unsafeInsert rb rhead x) - if idx == maxIndex + let fold = RB.unsafeFoldRing (RB.ringBound rb) + let !ringHash = fold addCksum 0 rb + if withSep then do - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if ringHash == patHash - then go2 SPEC ringHash rh' s acc' - else go0 SPEC (idx + 1) rh' s acc' - else go0 SPEC (idx + 1) rh' s acc' + fres <- fstep acc x + case fres of + FL.Partial sres -> + if idx == maxIndex + then if ringHash == patHash + then go2 SPEC ringHash rh' s sres + else go0 SPEC (idx + 1) rh' s sres + else go0 SPEC (idx + 1) rh' s sres + FL.Done bres -> + if idx == maxIndex + then if ringHash == patHash + then go2' SPEC ringHash rh' s bres + else go0' SPEC (idx + 1) rh' s bres + else go0' SPEC (idx + 1) rh' s bres + else if idx == maxIndex + then if ringHash == patHash + then go2 SPEC ringHash rh' s acc + else go0 SPEC (idx + 1) rh' s acc + else go0 SPEC (idx + 1) rh' s acc Skip s -> return $ Skip (GO_KARP_RABIN s rb rhead) Stop -> return Stop @@ -2148,21 +2273,46 @@ splitSuffixOn withSep patArr (Fold fstep initial done) -- XXX shall we use a random starting hash or 1 instead of 0? patHash = A.foldl' addCksum 0 patArr + -- XXX Have a terminating fold for folding a Ring to eliminate these + liftStep lstep (FL.Partial s) a = lstep s a + liftStep _ x _ = return x + liftExtract _ (FL.Done b) = return b + liftExtract ldone (FL.Partial s) = ldone s + -- rh == ringHead go0 !_ !idx !rh st !acc = do res <- step (adaptState gst) st case res of Yield x s -> do - acc' <- if withSep then liftStep fstep acc x else return acc rh' <- liftIO (RB.unsafeInsert rb rh x) - if idx == maxIndex + let fold = RB.unsafeFoldRing (RB.ringBound rb) + let !ringHash = fold addCksum 0 rb + if withSep then do - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if ringHash == patHash - then go2 SPEC ringHash rh' s acc' - else go1 SPEC ringHash rh' s acc' - else go0 SPEC (idx + 1) rh' s acc' + fres <- fstep acc x + case fres of + FL.Partial sres -> + if idx == maxIndex + then do + if ringHash == patHash + then go2 SPEC ringHash rh' s sres + else go1 SPEC ringHash rh' s sres + else go0 SPEC (idx + 1) rh' s sres + + FL.Done bres -> + if idx == maxIndex + then do + if ringHash == patHash + then go2' SPEC ringHash rh' s bres + else go1' SPEC ringHash rh' s bres + else go0' SPEC (idx + 1) rh' s bres + + else if idx == maxIndex + then do + if ringHash == patHash + then go2 SPEC ringHash rh' s acc + else go1 SPEC ringHash rh' s acc + else go0 SPEC (idx + 1) rh' s acc Skip s -> go0 SPEC idx rh s acc Stop -> do -- do not issue a blank segment when we end at pattern @@ -2170,10 +2320,31 @@ splitSuffixOn withSep patArr (Fold fstep initial done) then return Stop else do !acc' <- if idx /= 0 && not withSep - then RB.unsafeFoldRingM rh (liftStep fstep) acc rb - else return acc + then RB.unsafeFoldRingM rh (liftStep fstep) (FL.Partial acc) rb + else return $ FL.Partial acc liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + go0' !_ !idx !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + rh' <- liftIO (RB.unsafeInsert rb rh x) + let fold = RB.unsafeFoldRing (RB.ringBound rb) + let !ringHash = fold addCksum 0 rb + if idx == maxIndex + then do + if ringHash == patHash + then go2' SPEC ringHash rh' s acc + else go1' SPEC ringHash rh' s acc + else go0' SPEC (idx + 1) rh' s acc + Skip s -> go0' SPEC idx rh s acc + Stop -> do + -- do not issue a blank segment when we end at pattern + if (idx == maxIndex) && RB.unsafeEqArray rb rh patArr + then return Stop + -- Is this behaviour correct? + else return $ Yield acc GO_DONE + -- XXX Theoretically this code can do 4 times faster if GHC generates -- optimal code. If we use just "(cksum' == patHash)" condition it goes -- 4x faster, as soon as we add the "RB.unsafeEqArray rb v" condition @@ -2186,45 +2357,101 @@ splitSuffixOn withSep patArr (Fold fstep initial done) Yield x s -> do old <- liftIO $ peek rh let cksum' = deltaCksum cksum old x - acc' <- if withSep - then liftStep fstep acc x - else liftStep fstep acc old - - if (cksum' == patHash) + if withSep then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s acc' + fres <- fstep acc x + case fres of + FL.Partial sres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2 SPEC cksum' rh' s sres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1 SPEC cksum' rh' s sres + FL.Done bres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2' SPEC cksum' rh' s bres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s bres else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s acc' + fres <- fstep acc old + case fres of + FL.Partial sres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2 SPEC cksum' rh' s sres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1 SPEC cksum' rh' s sres + FL.Done bres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2' SPEC cksum' rh' s bres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s bres Skip s -> go1 SPEC cksum rh s acc Stop -> do if RB.unsafeEqArray rb rh patArr then return Stop else do acc' <- if withSep - then return acc - else RB.unsafeFoldRingFullM rh (liftStep fstep) acc rb + then return (FL.Partial acc) + else RB.unsafeFoldRingFullM rh (liftStep fstep) (FL.Partial acc) rb liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + go1' !_ !cksum !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum' = deltaCksum cksum old x + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2' SPEC cksum' rh' s acc + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s acc + Skip s -> go1' SPEC cksum rh s acc + Stop -> do + if RB.unsafeEqArray rb rh patArr + then return Stop + else return $ Yield acc GO_DONE + go2 !_ !cksum' !rh' s !acc' = do if RB.unsafeEqArray rb rh' patArr then do - r <- liftExtract done acc' + r <- done acc' return $ Yield r (GO_KARP_RABIN s rb rhead) else go1 SPEC cksum' rh' s acc' + go2' !_ !cksum' !rh' s !acc' = do + if RB.unsafeEqArray rb rh' patArr + then return $ Yield acc' (GO_KARP_RABIN s rb rhead) + else go1' SPEC cksum' rh' s acc' + stepOuter gst (GO_EMPTY_PAT st) = do res <- step (adaptState gst) st case res of Yield x s -> do acc <- initial acc' <- fstep acc x - liftExtract done acc' >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + case acc' of + FL.Partial sres -> + done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) Skip s -> return $ Skip (GO_EMPTY_PAT s) Stop -> return Stop stepOuter _ GO_DONE = return Stop +-} data SplitState s arr = SplitInitial s @@ -3607,30 +3834,35 @@ scanlMx' :: Monad m scanlMx' fstep begin done s = (begin >>= \x -> x `seq` done x) `consM` postscanlMx' fstep begin done s -{-# INLINE_NORMAL postscanFold #-} -postscanFold :: Monad m +{-# INLINE_NORMAL postscanOnce #-} +postscanOnce :: Monad m => FL.Fold m a b -> Stream m a -> Stream m b -postscanFold (FL.Fold fstep begin done) (Stream step state) = do - Stream step' (state, liftInitialM begin) - where +postscanOnce (FL.Fold fstep begin done) (Stream step state) = + Stream step' (Tuple' state begin) + + where + {-# INLINE_LATE step' #-} - step' gst (st, acc) = do + step' gst (Tuple' st acc) = do r <- step (adaptState gst) st case r of Yield x s -> do old <- acc - y <- liftStep fstep old x - v <- liftExtract done y - v `seq` y `seq` return (Yield v (s, return y)) - Skip s -> return $ Skip (s, acc) - Stop -> return Stop - + y <- fstep old x + case y of + FL.Partial sres -> do + !v <- done sres + return (Yield v (Tuple' s (return sres))) + FL.Done _ -> return $ Stop + FL.Done1 _ -> return $ Stop + Skip s -> return $ Skip (Tuple' s acc) + Stop -> return Stop -{-# INLINE scanFold #-} -scanFold :: Monad m +{-# INLINE scanOnce #-} +scanOnce :: Monad m => FL.Fold m a b -> Stream m a -> Stream m b -scanFold fld@(FL.Fold _ begin done) s = - (begin >>= \x -> x `seq` done x) `consM` postscanFold fld s +scanOnce fld@(FL.Fold _ begin done) s = + (begin >>= \x -> x `seq` done x) `consM` postscanOnce fld s {-# INLINE scanlx' #-} scanlx' :: Monad m @@ -3811,59 +4043,87 @@ rollingMap f = rollingMapM (\x y -> return $ f x y) -- Tapping/Distributing ------------------------------------------------------------------------------ +data TapState sv st = TapInit | Tapping sv st | TapDone st + +-- XXX Multiple yield points {-# INLINE tap #-} tap :: Monad m => Fold m a b -> Stream m a -> Stream m a -tap (Fold fstep initial extract) (Stream step state) = Stream step' Nothing +tap (Fold fstep initial extract) (Stream step state) = Stream step' TapInit where - step' _ Nothing = do - r <- liftInitialM initial - return $ Skip (Just (r, state)) - - step' gst (Just (acc, st)) = acc `seq` do + step' _ TapInit = do + r <- initial + return $ Skip (Tapping r state) + step' gst (Tapping acc st) = + acc `seq` do + r <- step gst st + case r of + Yield x s -> do + acc' <- fstep acc x + return + $ case acc' of + FL.Partial sres -> Yield x (Tapping sres s) + FL.Done _ -> Yield x (TapDone s) + FL.Done1 _ -> Yield x (TapDone s) + Skip s -> return $ Skip (Tapping acc s) + Stop -> do + void $ extract acc + return $ Stop + step' gst (TapDone st) = do r <- step gst st - case r of - Yield x s -> do - acc' <- liftStep fstep acc x - return $ Yield x (Just (acc', s)) - Skip s -> return $ Skip (Just (acc, s)) - Stop -> do - void $ liftExtract extract acc - return $ Stop + return + $ case r of + Yield x s -> Yield x (TapDone s) + Skip s -> Skip (TapDone s) + Stop -> Stop + +data TapOffState fs s b n = TapOffInit | TapOffTapping fs s n | TapOffDone s +-- XXX Multiple yield points {-# INLINE_NORMAL tapOffsetEvery #-} tapOffsetEvery :: Monad m => Int -> Int -> Fold m a b -> Stream m a -> Stream m a tapOffsetEvery offset n (Fold fstep initial extract) (Stream step state) = - Stream step' Nothing + Stream step' TapOffInit where {-# INLINE_LATE step' #-} - step' _ Nothing = do - r <- liftInitialM initial - return $ Skip (Just (r, state, offset `mod` n)) - - step' gst (Just (acc, st, count)) | count <= 0 = do + step' _ TapOffInit = do + r <- initial + return $ Skip $ TapOffTapping r state (offset `mod` n) + step' gst (TapOffTapping acc st count) + | count <= 0 = do + r <- step gst st + case r of + Yield x s -> do + acc' <- fstep acc x + return + $ case acc' of + FL.Partial sres -> + Yield x (TapOffTapping sres s (n - 1)) + FL.Done _ -> Yield x (TapOffDone s) + FL.Done1 _ -> Yield x (TapOffDone s) + Skip s -> return $ Skip (TapOffTapping acc s count) + Stop -> do + void $ extract acc + return $ Stop + step' gst (TapOffTapping acc st count) = do r <- step gst st case r of - Yield x s -> do - !acc' <- liftStep fstep acc x - return $ Yield x (Just (acc', s, n - 1)) - Skip s -> return $ Skip (Just (acc, s, count)) - Stop -> do - void $ liftExtract extract acc + Yield x s -> return $ Yield x (TapOffTapping acc s (count - 1)) + Skip s -> return $ Skip (TapOffTapping acc s count) + Stop -> do + void $ extract acc return $ Stop - - step' gst (Just (acc, st, count)) = do + step' gst (TapOffDone st) = do r <- step gst st - case r of - Yield x s -> return $ Yield x (Just (acc, s, count - 1)) - Skip s -> return $ Skip (Just (acc, s, count)) - Stop -> do - void $ liftExtract extract acc - return $ Stop + return + $ case r of + Yield x s -> Yield x (TapOffDone s) + Skip s -> Skip (TapOffDone s) + Stop -> Stop {-# INLINE_NORMAL pollCounts #-} pollCounts @@ -3883,7 +4143,7 @@ pollCounts predicate transf fld (Stream step state) = Stream step' Nothing -- However, an Int on a 32-bit machine may overflow quickly. countVar <- liftIO $ Prim.newIORef (0 :: Int) tid <- forkManaged - $ void $ runFold fld + $ void $ foldOnce fld $ transf $ fromPrimIORef countVar return $ Skip (Just (countVar, tid, state)) @@ -4389,23 +4649,23 @@ the (Stream step state) = go state Skip s -> go' n s Stop -> return (Just n) -{-# INLINE runFold #-} -runFold :: (Monad m) => Fold m a b -> Stream m a -> m b -runFold (Fold fstep begin done) (Stream step state) = +{-# INLINE foldOnce #-} +foldOnce :: (Monad m) => Fold m a b -> Stream m a -> m b +foldOnce (Fold fstep begin done) (Stream step state) = begin >>= \x -> go SPEC x state where - -- XXX !acc? {-# INLINE_LATE go #-} - go !_ acc st = acc `seq` do + go !_ !fs st = do r <- step defState st case r of Yield x s -> do - acc' <- fstep acc x - case acc' of + res <- fstep fs x + case res of FL.Done b -> return b - FL.Partial acc'' -> go SPEC acc'' s - Skip s -> go SPEC acc s - Stop -> done acc + FL.Done1 b -> return b + FL.Partial sres -> go SPEC sres s + Skip s -> go SPEC fs s + Stop -> done fs ------------------------------------------------------------------------------- -- Concurrent application and fold @@ -4444,10 +4704,10 @@ toSVarParallel st sv xs = where {-# NOINLINE work #-} - work info = (runFold (FL.toParallelSVar sv info) xs) + work info = (foldOnce (FL.toParallelSVar sv info) xs) {-# NOINLINE workLim #-} - workLim info = runFold (FL.toParallelSVarLimited sv info) xs + workLim info = foldOnce (FL.toParallelSVarLimited sv info) xs {-# NOINLINE forkWithDiag #-} forkWithDiag = do @@ -4557,9 +4817,7 @@ newFoldSVar stt f = do where {-# NOINLINE work #-} - work sv = void $ runFold f $ fromProducer sv - -data TapState sv st = TapInit | Tapping sv st | TapDone st + work sv = void $ foldOnce f $ fromProducer sv {-# INLINE_NORMAL tapAsync #-} tapAsync :: MonadAsync m => Fold m a b -> Stream m a -> Stream m a @@ -4619,7 +4877,7 @@ lastN n where step (Tuple3' rb rh i) a = do rh1 <- liftIO $ RB.unsafeInsert rb rh a - FL.partialM $ Tuple3' rb rh1 (i + 1) + return $ FL.Partial $ Tuple3' rb rh1 (i + 1) initial = fmap (\(a, b) -> Tuple3' a b (0 :: Int)) $ liftIO $ RB.new n done (Tuple3' rb rh i) = do arr <- liftIO $ MA.newArray n diff --git a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs index f2a6f17889..940fbe704d 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs @@ -46,6 +46,8 @@ module Streamly.Internal.Data.Stream.StreamD.Type , cmpBy , take , GroupState (..) -- for inspection testing + , foldMany + , foldMany1 , groupsOf , groupsOf2 ) @@ -584,10 +586,12 @@ take n (Stream step state) = n `seq` Stream step' (state, 0) ------------------------------------------------------------------------------ -- s = stream state, fs = fold state -data GroupState s fs b +{-# ANN type GroupState Fuse #-} +data GroupState s fs b a = GroupStart s + | GroupConsume s a | GroupBuffer s fs - | GroupYield b (GroupState s fs b) + | GroupYield b (GroupState s fs b a) | GroupFinish {-# INLINE_NORMAL foldMany #-} @@ -602,23 +606,84 @@ foldMany (Fold fstep initial extract) (Stream step state) = -- fs = fold state fs <- initial return $ Skip (GroupBuffer st fs) - + step' _ (GroupConsume st x) = do + fs <- initial + fs' <- fstep fs x + return + $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart st)) + FL.Partial ps -> Skip (GroupBuffer st ps) + -- XXX This will lead to an infinite loop most of the time. But + -- it may terminate as it is an effectful step function. If + -- possible, we should somehow warn the user. + FL.Done1 b -> Skip (GroupYield b (GroupConsume st x)) step' gst (GroupBuffer st fs) = do r <- step (adaptState gst) st case r of Yield x s -> do fs' <- fstep fs x - return $ case fs' of - FL.Done b -> Skip (GroupYield b (GroupStart s)) - FL.Partial ps -> Skip (GroupBuffer s ps) - Skip s -> - return $ Skip (GroupBuffer s fs) + return + $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart s)) + FL.Partial ps -> Skip (GroupBuffer s ps) + FL.Done1 b -> Skip (GroupYield b (GroupConsume s x)) + Skip s -> return $ Skip (GroupBuffer s fs) Stop -> do b <- extract fs return $ Skip (GroupYield b GroupFinish) - step' _ (GroupYield b next) = return $ Yield b next + step' _ GroupFinish = return Stop + + +{-# INLINE_NORMAL foldMany1 #-} +foldMany1 :: Monad m => Fold m a b -> Stream m a -> Stream m b +foldMany1 (Fold fstep initial extract) (Stream step state) = + Stream step' (GroupStart state) + where + + {-# INLINE_LATE step' #-} + step' gst (GroupStart st) = do + -- fs = fold state + r <- step (adaptState gst) st + case r of + Yield x s -> do + fs <- initial + fs' <- fstep fs x + return + $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart s)) + -- XXX This will lead to an infinite loop most of the + -- time. But it may terminate as it is an effectful + -- step function. If possible, we should somehow warn + -- the user. + FL.Done1 b -> Skip (GroupYield b (GroupConsume s x)) + FL.Partial ps -> Skip (GroupBuffer s ps) + Skip s -> return $ Skip (GroupStart s) + Stop -> return $ Stop + step' _ (GroupConsume st x) = do + fs <- initial + fs' <- fstep fs x + return + $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart st)) + FL.Done1 b -> Skip (GroupYield b (GroupConsume st x)) + FL.Partial ps -> Skip (GroupBuffer st ps) + step' gst (GroupBuffer st fs) = do + r <- step (adaptState gst) st + case r of + Yield x s -> do + fs' <- fstep fs x + return + $ case fs' of + FL.Done b -> Skip (GroupYield b (GroupStart s)) + FL.Partial ps -> Skip (GroupBuffer s ps) + FL.Done1 b -> Skip (GroupYield b (GroupConsume s x)) + Skip s -> return $ Skip (GroupBuffer s fs) + Stop -> do + b <- extract fs + return $ Skip (GroupYield b GroupFinish) + step' _ (GroupYield b next) = return $ Yield b next step' _ GroupFinish = return Stop {-# INLINE groupsOf #-} diff --git a/src/Streamly/Internal/Data/Stream/StreamK.hs b/src/Streamly/Internal/Data/Stream/StreamK.hs index eb826624f3..1aa1540650 100644 --- a/src/Streamly/Internal/Data/Stream/StreamK.hs +++ b/src/Streamly/Internal/Data/Stream/StreamK.hs @@ -88,7 +88,7 @@ module Streamly.Internal.Data.Stream.StreamK , foldlT , foldlx' , foldlMx' - , runFold + , foldOnce -- ** Specialized Folds , drain @@ -434,18 +434,24 @@ foldlMx' step begin done m = go begin m yieldk a r = acc >>= \b -> step b a >>= \x -> go (return x) r in foldStream defState yieldk single stop m1 -{-# INLINABLE runFold #-} -runFold :: (IsStream t, Monad m) => FL.Fold m a b -> t m a -> m b -runFold (FL.Fold step begin done) m = go begin m +{-# INLINABLE foldOnce #-} +foldOnce :: (IsStream t, Monad m) => FL.Fold m a b -> t m a -> m b +foldOnce (FL.Fold step begin done) m = go begin m where go !acc m1 = let stop = acc >>= done - single a = acc >>= \b -> step b a >>= FL.liftExtract done + single a = acc + >>= \b -> step b a + >>= \x -> case x of + FL.Partial s -> done s + FL.Done b1 -> return b1 + FL.Done1 b1 -> return b1 yieldk a r = acc >>= \b -> step b a >>= \x -> case x of FL.Partial s -> go (return s) r FL.Done b1 -> return b1 + FL.Done1 b1 -> return b1 in foldStream defState yieldk single stop m1 -- | Like 'foldl'' but with a monadic step function. diff --git a/src/Streamly/Internal/Data/Unfold.hs b/src/Streamly/Internal/Data/Unfold.hs index c633f65b56..53445eb2ea 100644 --- a/src/Streamly/Internal/Data/Unfold.hs +++ b/src/Streamly/Internal/Data/Unfold.hs @@ -265,6 +265,7 @@ fold (Unfold ustep inject) (Fold fstep initial extract) a = case acc' of FL.Partial acc'' -> go SPEC acc'' s FL.Done c -> return c + FL.Done1 c -> return c Skip s -> go SPEC acc s Stop -> extract acc diff --git a/src/Streamly/Internal/FileSystem/File.hs b/src/Streamly/Internal/FileSystem/File.hs index 9df512c7b0..d2134236b4 100644 --- a/src/Streamly/Internal/FileSystem/File.hs +++ b/src/Streamly/Internal/FileSystem/File.hs @@ -360,7 +360,7 @@ writeChunks path = Fold step initial extract return (fld, h) step (fld, h) x = do r <- FL.runStep fld x `MC.onException` liftIO (hClose h) - FL.partialM (r, h) + return $ FL.Partial (r, h) extract (Fold _ initial1 extract1, h) = do liftIO $ hClose h initial1 >>= extract1 diff --git a/src/Streamly/Internal/Network/Inet/TCP.hs b/src/Streamly/Internal/Network/Inet/TCP.hs index 1ebc0779dc..9437e9d320 100644 --- a/src/Streamly/Internal/Network/Inet/TCP.hs +++ b/src/Streamly/Internal/Network/Inet/TCP.hs @@ -360,7 +360,7 @@ writeChunks addr port = Fold step initial extract return (Tuple' fld skt) step (Tuple' fld skt) x = do r <- FL.runStep fld x `MC.onException` liftIO (Net.close skt) - FL.partialM (Tuple' r skt) + return $ FL.Partial (Tuple' r skt) extract (Tuple' (Fold _ initial1 extract1) skt) = do liftIO $ Net.close skt initial1 >>= extract1 diff --git a/src/Streamly/Internal/Unicode/Stream.hs b/src/Streamly/Internal/Unicode/Stream.hs index 4ef64b5f7b..02724d4646 100644 --- a/src/Streamly/Internal/Unicode/Stream.hs +++ b/src/Streamly/Internal/Unicode/Stream.hs @@ -160,7 +160,7 @@ utf8d = unsafePerformIO -- Aligning to cacheline makes a barely noticeable difference -- XXX currently alignment is not implemented for unmanaged allocation - $ D.runFold (A.writeNAlignedUnmanaged 64 (length decodeTable)) + $ D.foldOnce (A.writeNAlignedUnmanaged 64 (length decodeTable)) (D.fromList decodeTable) -- | Return element at the specified index without checking the bounds. From 5d26e5549bd9699a3928615a6af64148f9d3bfbd Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sat, 5 Sep 2020 21:20:50 +0530 Subject: [PATCH 04/30] Add a few benchmarks to Fold --- benchmark/Streamly/Benchmark/Data/Fold.hs | 83 +++++++++++++++++------ benchmark/streamly-benchmarks.cabal | 1 + src/Streamly/Internal/Data/Fold.hs | 12 ++-- 3 files changed, 70 insertions(+), 26 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs index 2838870f52..528dd0577b 100644 --- a/benchmark/Streamly/Benchmark/Data/Fold.hs +++ b/benchmark/Streamly/Benchmark/Data/Fold.hs @@ -15,7 +15,10 @@ import Data.Monoid (Last(..), Sum(..)) import System.Random (randomRIO) import Prelude (IO, Int, Double, String, (>), (<*>), (<$>), (+), ($), (<=), Monad(..), (==), Maybe(..), (.), fromIntegral, - compare, (>=), concat, seq) + compare, (>=), concat, seq, mod, fst, snd, const) + +import qualified Prelude as P +import qualified Data.Map.Strict as Map import qualified Streamly.Prelude as S import qualified Streamly.Internal.Data.Fold as FL @@ -66,15 +69,15 @@ o_1_space_serial_elimination value = [ bgroup "serially" [ bgroup "elimination" [ benchIOSink value "drain" (S.fold FL.drain) - , benchIOSink value "drainN" (S.fold (IFL.drainN value)) + , benchIOSink value "drainN" (S.fold (FL.drainN value)) , benchIOSink value "drainWhileTrue" - (S.fold (IFL.drainWhile $ (<=) (value + 1))) + (S.fold (FL.drainWhile $ (<=) (value + 1))) , benchIOSink value "drainWhileFalse" - (S.fold (IFL.drainWhile $ (>=) (value + 1))) + (S.fold (FL.drainWhile $ (>=) (value + 1))) , benchIOSink value "sink" (S.fold $ Sink.toFold Sink.drain) , benchIOSink value "last" (S.fold FL.last) , benchIOSink value "lastN.1" (S.fold (IA.lastN 1)) @@ -149,12 +152,12 @@ o_1_space_serial_transformation :: Int -> [Benchmark] o_1_space_serial_transformation value = [ bgroup "serially" [ bgroup "transformation" - [ benchIOSink value "lmap" (S.fold (IFL.lmap (+ 1) FL.drain)) + [ benchIOSink value "lmap" (S.fold (FL.lmap (+ 1) FL.drain)) , benchIOSink value "pipe-mapM" (S.fold - (IFL.transform + (FL.transform (Pipe.mapM (\x -> return $ x + 1)) FL.drain)) ] @@ -163,22 +166,60 @@ o_1_space_serial_transformation value = o_1_space_serial_composition :: Int -> [Benchmark] o_1_space_serial_composition value = - [ bgroup "serially" - [ bgroup "composition" -- Applicative - [ benchIOSink - value - "all,any" - (S.fold - ((,) <$> FL.all (<= (value + 1)) <*> - FL.any (> (value + 1)))) - , benchIOSink - value - "sum,length" - (S.fold ((,) <$> FL.sum <*> FL.length)) - ] - ] + [ bgroup + "serially" + [ bgroup + "composition" + -- Applicative + [ benchIOSink + value + "<*> all any" + (S.fold + ((,) <$> FL.all (<= (value + 1)) + <*> FL.any (> (value + 1)))) + , benchIOSink + value + "<*> sum length" + (S.fold ((,) <$> FL.sum <*> FL.length)) + , benchIOSink + value + "distribute_ [sum, length]" + (S.fold + (FL.distribute_ + [const () <$> FL.sum, const () <$> FL.length])) + , benchIOSink + value + "demuxWith [sum, length]" + (S.fold (const () <$> FL.demuxWith demuxFunc demuxMap)) + , benchIOSink + value + "demuxWith_ [sum, length]" + (S.fold (const () <$> FL.demuxWith_ demuxFunc demuxMap)) + , benchIOSink + value + "demuxWithDefault_ [sum, length] sum" + (S.fold + (const () + <$> FL.demuxWithDefault_ + demuxFunc + demuxMap + (FL.lmap snd FL.sum))) + , benchIOSink + value + "classifyWith sum" + (S.fold + (const () + <$> FL.classifyWith (fst . demuxFunc) FL.sum)) + ] + ] ] + where + + demuxFunc x = (x `mod` 3, x) + + demuxMap = Map.fromList [(0, FL.sum), (1, FL.length)] + o_n_heap_serial :: Int -> [Benchmark] o_n_heap_serial value = [ bgroup "serially" @@ -188,7 +229,7 @@ o_n_heap_serial value = [ benchIOSink value "toStream" (S.fold IP.toStream) , benchIOSink value "toStreamRev" (S.fold IP.toStreamRev) , benchIOSink value "toList" (S.fold FL.toList) - , benchIOSink value "toListRevF" (S.fold IFL.toListRevF) + , benchIOSink value "toListRevF" (S.fold FL.toListRevF) -- Converting the stream to an array , benchIOSink value "lastN.Max" (S.fold (IA.lastN (value + 1))) , benchIOSink value "writeN" (S.fold (A.writeN value)) diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index 4e210b328e..bbae492256 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -246,6 +246,7 @@ benchmark Data.Fold type: exitcode-stdio-1.0 hs-source-dirs: Streamly/Benchmark/Data main-is: Fold.hs + build-depends: containers >= 0.5 && < 0.7 if impl(ghcjs) buildable: False else diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index 54bb1a252a..a6fccfdc15 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -183,16 +183,16 @@ module Streamly.Internal.Data.Fold -- * Demultiplexing , demux - -- , demuxWith + , demuxWith , demux_ , demuxDefault_ - -- , demuxWith_ + , demuxWith_ , demuxWithDefault_ -- * Classifying , classify - -- , classifyWith + , classifyWith -- * Unzipping , unzip @@ -1341,7 +1341,7 @@ toUnitState (Partial _) = Partial () toUnitState (Done _) = Done () toUnitState (Done1 _) = Done1 () - +-- XXX This is 2x times faster wiithout the terminating condition. -- | Like 'distribute' but for folds that return (), this can be more efficient -- than 'distribute' as it does not need to maintain state. -- @@ -1368,9 +1368,11 @@ distribute_ fs = Fold step initial extract extract ss = Prelude.mapM_ runMaybeExtract ss + {-# INLINE runMaybeExtract #-} runMaybeExtract (Partial (Fold _ i d)) = i >>= d runMaybeExtract _ = return () + {-# INLINE runMaybeStep #-} runMaybeStep (Done1 ()) _ = return $ Done1 () runMaybeStep (Done ()) _ = return $ Done1 () runMaybeStep (Partial (Fold s i d)) a = do @@ -1652,7 +1654,7 @@ demux :: (Monad m, Ord k) demux = demuxWith id -- data DemuxState m s = DemuxP !m !s | DemuxD !m -data DemuxState m s = DemuxP Int s m | DemuxD Int m +data DemuxState m s = DemuxP Int !s !m | DemuxD Int !m {-# INLINE demuxWithDefault_ #-} demuxWithDefault_ :: (Monad m, Ord k) From 159e6ff17d3e87cabacc606811dd09de7e0262c8 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sun, 6 Sep 2020 13:18:02 +0530 Subject: [PATCH 05/30] fixup! Add a few benchmarks to Fold --- benchmark/Streamly/Benchmark/Data/Fold.hs | 149 +++++++++++++++------- 1 file changed, 101 insertions(+), 48 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs index 528dd0577b..0de7840159 100644 --- a/benchmark/Streamly/Benchmark/Data/Fold.hs +++ b/benchmark/Streamly/Benchmark/Data/Fold.hs @@ -13,22 +13,25 @@ import Control.DeepSeq (NFData(..)) import Data.Monoid (Last(..), Sum(..)) import System.Random (randomRIO) -import Prelude (IO, Int, Double, String, (>), (<*>), (<$>), (+), ($), +import Prelude (IO, Int, Double, String, (>), (<$>), (+), ($), (<=), Monad(..), (==), Maybe(..), (.), fromIntegral, - compare, (>=), concat, seq, mod, fst, snd, const) + compare, (>=), concat, seq, mod, fst, snd, const, Bool, Ord(..), + div, Num(..)) + +import Data.Map.Strict (Map) +import Streamly.Internal.Data.Fold (Fold(..)) -import qualified Prelude as P import qualified Data.Map.Strict as Map import qualified Streamly.Prelude as S import qualified Streamly.Internal.Data.Fold as FL +import qualified Streamly.Internal.Data.Fold.Types as FL import qualified Streamly.Internal.Data.Pipe as Pipe import qualified Streamly.Internal.Data.Sink as Sink import qualified Streamly.Data.Array.Storable.Foreign as A import qualified Streamly.Internal.Data.Array.Storable.Foreign as IA -import qualified Streamly.Internal.Data.Fold as IFL import qualified Streamly.Internal.Data.Stream.IsStream as IP import Gauge @@ -57,8 +60,84 @@ benchIOSink => Int -> String -> (t IO Int -> IO b) -> Benchmark benchIOSink value name f = bench name $ nfIO $ randomRIO (1,1) >>= f . source value + +------------------------------------------------------------------------------- +-- Folds +------------------------------------------------------------------------------- + +{-# INLINE any #-} +any :: (Monad m, Ord a) => a -> SerialT m a -> m Bool +any value = IP.fold (FL.any (> value)) + +{-# INLINE all #-} +all :: (Monad m, Ord a) => a -> SerialT m a -> m Bool +all value = IP.fold (FL.all (<= value)) + +{-# INLINE take #-} +take :: Monad m => Int -> SerialT m a -> m () +take value = IP.fold (FL.ltake value FL.drain) + +{-# INLINE takeWhile #-} +takeWhile :: Monad m => Int -> SerialT m Int -> m () +takeWhile value = IP.fold (FL.ltakeWhile (<= value) FL.drain) + +{-# INLINE many #-} +many :: Monad m => SerialT m Int -> m Int +many = IP.fold (FL.many FL.length FL.sum) + +{-# INLINE splitAllAny #-} +splitAllAny :: Monad m => Int -> SerialT m Int -> m (Bool, Bool) +splitAllAny value = + IP.fold (FL.splitWith (,) (FL.all (<= (value `div` 2))) (FL.any (> value))) + +{-# INLINE teeAllAny #-} +teeAllAny :: (Monad m, Ord a) => a -> SerialT m a -> m (Bool, Bool) +teeAllAny value = + IP.fold (FL.teeWith (,) (FL.all (<= value)) (FL.any (> value))) + +{-# INLINE teeSumLength #-} +teeSumLength :: Monad m => SerialT m Int -> m (Int, Int) +teeSumLength = IP.fold (FL.teeWith (,) FL.sum FL.length) + +{-# INLINE distribute_ #-} +distribute_ :: Monad m => SerialT m Int -> m () +distribute_ = + IP.fold (FL.distribute_ [const () <$> FL.sum, const () <$> FL.length]) + +{-# INLINE demuxWith #-} +demuxWith :: + (Monad m, Ord k) + => (a -> (k, a')) + -> Map k (Fold m a' b) + -> SerialT m a + -> m (Map k b) +demuxWith f mp = S.fold (FL.demuxWith f mp) + +{-# INLINE demuxWith_ #-} +demuxWith_ :: + (Monad m, Ord k) + => (a -> (k, a')) + -> Map k (Fold m a' b) + -> SerialT m a + -> m () +demuxWith_ f mp = S.fold (FL.demuxWith_ f mp) + +{-# INLINE demuxWithDefault_ #-} +demuxWithDefault_ :: + (Monad m, Ord k, Num b) + => (a -> (k, b)) + -> Map k (Fold m b b) + -> SerialT m a + -> m () +demuxWithDefault_ f mp = S.fold (FL.demuxWithDefault_ f mp (FL.lmap snd FL.sum)) + +{-# INLINE classifyWith #-} +classifyWith :: + (Monad m, Ord k, Num a) => (a -> k) -> SerialT m a -> m (Map k a) +classifyWith f = S.fold (FL.classifyWith f FL.sum) + ------------------------------------------------------------------------------- --- Stream folds +-- Benchmarks ------------------------------------------------------------------------------- moduleName :: String @@ -133,8 +212,10 @@ o_1_space_serial_elimination value = , benchIOSink value "null" (S.fold FL.null) , benchIOSink value "elem" (S.fold (FL.elem (value + 1))) , benchIOSink value "notElem" (S.fold (FL.notElem (value + 1))) - , benchIOSink value "all" (S.fold (FL.all (<= (value + 1)))) - , benchIOSink value "any" (S.fold (FL.any (> (value + 1)))) + , benchIOSink value "all" $ all value + , benchIOSink value "any" $ any value + , benchIOSink value "take" $ take value + , benchIOSink value "takeWhile" $ takeWhile value , benchIOSink value "and" @@ -171,54 +252,26 @@ o_1_space_serial_composition value = [ bgroup "composition" -- Applicative - [ benchIOSink - value - "<*> all any" - (S.fold - ((,) <$> FL.all (<= (value + 1)) - <*> FL.any (> (value + 1)))) - , benchIOSink - value - "<*> sum length" - (S.fold ((,) <$> FL.sum <*> FL.length)) - , benchIOSink - value - "distribute_ [sum, length]" - (S.fold - (FL.distribute_ - [const () <$> FL.sum, const () <$> FL.length])) - , benchIOSink - value - "demuxWith [sum, length]" - (S.fold (const () <$> FL.demuxWith demuxFunc demuxMap)) - , benchIOSink - value - "demuxWith_ [sum, length]" - (S.fold (const () <$> FL.demuxWith_ demuxFunc demuxMap)) - , benchIOSink - value - "demuxWithDefault_ [sum, length] sum" - (S.fold - (const () - <$> FL.demuxWithDefault_ - demuxFunc - demuxMap - (FL.lmap snd FL.sum))) - , benchIOSink - value - "classifyWith sum" - (S.fold - (const () - <$> FL.classifyWith (fst . demuxFunc) FL.sum)) + [ benchIOSink value "tee (all, any)" $ teeAllAny value + , benchIOSink value "tee (sum, length)" $ teeSumLength + , benchIOSink value "distribute_ [sum, length]" $ distribute_ + , benchIOSink value "demuxWith [sum, length]" $ demuxWith fn mp + , benchIOSink value "demuxWith_ [sum, length]" + $ demuxWith_ fn mp + , benchIOSink value "demuxWithDefault_ [sum, length] sum" + $ demuxWithDefault_ fn mp + , benchIOSink value "classifyWith sum" $ classifyWith (fst . fn) + , benchIOSink value "many length sum" many + , benchIOSink value "split (all, any)" $ splitAllAny value ] ] ] where - demuxFunc x = (x `mod` 3, x) + fn x = (x `mod` 3, x) - demuxMap = Map.fromList [(0, FL.sum), (1, FL.length)] + mp = Map.fromList [(0, FL.sum), (1, FL.length)] o_n_heap_serial :: Int -> [Benchmark] o_n_heap_serial value = From 11934c176c3d86ead4991d78f7aae8edf1f0f3b8 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Tue, 8 Sep 2020 23:51:32 +0530 Subject: [PATCH 06/30] Intermediate splitOn --- .../Benchmark/FileSystem/Handle/Read.hs | 4 +- src/Streamly/Internal/Data/Stream/StreamD.hs | 127 +++++++++--------- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index 5f886bd166..e20a6f01ee 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -472,8 +472,8 @@ o_1_space_reduce_read_split env = splitOnSuffix inh , mkBench "S.splitOnSeq \"\" FL.drain" env $ \inh _ -> splitOnSeq "" inh - , mkBench "S.splitOnSuffixSeq \"\" FL.drain" env $ \inh _ -> - splitOnSuffixSeq "" inh +-- , mkBench "S.splitOnSuffixSeq \"\" FL.drain" env $ \inh _ -> +-- splitOnSuffixSeq "" inh , mkBench "S.splitOnSeq \"\\n\" FL.drain" env $ \inh _ -> splitOnSeq "\n" inh , mkBench "S.splitOnSuffixSeq \"\\n\" FL.drain" env $ \inh _ -> diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index e7566af6e4..1c3624ad52 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1619,18 +1619,16 @@ data SplitOptions = SplitOptions } -} -data SplitOnState fs s wrd a x = + +{-# ANN type SplitOnState Fuse #-} +data SplitOnState fs s a b = GO_START | GO_EMPTY_PAT s - | GO_SINGLE_PAT fs s - | GO_SINGLE_PAT_WITH s a - | GO_SHORT_PAT s - | GO_SHORT_PAT_WITH s wrd a - | GO_SHORT_PAT_DRAIN Int fs s wrd - | GO_KARP_RABIN s !(RB.Ring a) !(Ptr a) + | GO_SINGLE_PAT_NEXT fs s a + | GO_SINGLE_PAT_WITH fs s a a + | GO_YIELD b (SplitOnState fs s a b) | GO_DONE - -- XXX Can this be written as smaller folds and be used with foldMany/foldMany1. -- The logic is basically the same thing, the only catch being that we have rely -- on GHC for simplification. We can compare the performance and choose @@ -1642,8 +1640,7 @@ splitOn -> Fold m a b -> Stream m a -> Stream m b -splitOn patArr (Fold fstep initial done) (Stream step state) = undefined -{- +splitOn patArr (Fold fstep initial done) (Stream step state) = Stream stepOuter GO_START where @@ -1651,8 +1648,6 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = undefined patLen = A.length patArr maxIndex = patLen - 1 elemBits = sizeOf (undefined :: a) * 8 - -- Since the Array is immutable we can get away with using unsafePerformIO - sngPat = unsafePerformIO $ A.unsafeIndexIO patArr 0 {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = @@ -1661,13 +1656,18 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = undefined else if patLen == 1 then do acc <- initial - return $ Skip $ GO_SINGLE_PAT acc state - else if sizeOf (undefined :: a) * patLen + pat <- liftIO $ A.unsafeIndexIO patArr 0 + return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat + else undefined + {- + if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT state else do (rb, rhead) <- liftIO $ RB.new patLen return $ Skip $ GO_KARP_RABIN state rb rhead + -} + stepOuter _ (GO_YIELD x ns) = return $ Yield x ns ----------------- -- Single Pattern ----------------- @@ -1675,68 +1675,66 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = undefined -- have more info in the state but you would be relying on GHC for -- simplification. -- XXX Multiple Yield points - stepOuter _ (GO_SINGLE_PAT_WITH s x) = do - acc <- initial - acc' <- fstep acc x - case acc' of - FL.Partial sres -> return $ Skip $ (GO_SINGLE_PAT sres s) - FL.Done bres -> do - ini <- initial - return $ Yield bres (GO_SINGLE_PAT ini s) - FL.Done1 bres -> return $ Yield bres (GO_SINGLE_PAT_WITH s x) - stepOuter gst (GO_SINGLE_PAT fs stt) = go SPEC stt fs - - where - - -- XXX Multiple Yield points - go !_ st !acc = do - res <- step (adaptState gst) st + stepOuter _ (GO_SINGLE_PAT_WITH fs s pat x) = + if pat == x + then do + r <- done fs + ini <- initial + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_NEXT ini s pat) + else do + res <- fstep fs x case res of - Yield x s -> do - if sngPat == x - then do - r <- done acc - ini <- initial - return $ Yield r (GO_SINGLE_PAT ini s) - else do - acc' <- fstep acc x - case acc' of - FL.Partial sres -> go SPEC s sres - FL.Done bres -> do - ini <- initial - return $ Yield bres (GO_SINGLE_PAT ini s) - FL.Done1 bres -> - return $ Yield bres (GO_SINGLE_PAT_WITH s x) - Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r GO_DONE + FL.Partial sres -> + return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + FL.Done bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_NEXT ini s pat) + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) + stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat x + Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat + Stop -> done fs >>= \r -> return $ Skip $ GO_YIELD r GO_DONE + --------------------------- + -- Empty pattern + --------------------------- + stepOuter gst (GO_EMPTY_PAT st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + acc <- initial + acc' <- fstep acc x + case acc' of + FL.Partial sres -> + done sres + >>= \r -> return $ Skip $ GO_YIELD r (GO_EMPTY_PAT s) + FL.Done bres -> + return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT s) + Skip s -> return $ Skip (GO_EMPTY_PAT s) + Stop -> return Stop + stepOuter _ GO_DONE = return Stop +{- --------------------------- -- Short Pattern - Shift Or --------------------------- -- XXX I've adapted the code accordingly but in my opinion redesigning this -- would be better. - stepOuter _ (GO_SHORT_PAT_WITH s wrd x) = do - acc <- initial + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + stepOuter _ (GO_SHORT_PAT_WITH fs s mask patWord wrd x) = do let wrd' = addToWord wrd x old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - acc' <- fstep acc (toEnum $ fromIntegral old) - case acc' of + res <- fstep fs (toEnum $ fromIntegral old) + case res of FL.Partial sres -> if wrd' .&. mask == patWord then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) else return $ Skip $ GO_SHORT_PAT s FL.Done bres -> return $ Yield bres (GO_SHORT_PAT s) FL.Done1 bres -> return $ Yield bres (GO_SHORT_PAT_WITH s wrd x) - - where - - -- XXX Duplicated code - mask :: Word - mask = (1 `shiftL` (elemBits * patLen)) - 1 - - addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - patWord :: Word - patWord = mask .&. A.foldl' addToWord 0 patArr stepOuter _ (GO_SHORT_PAT_DRAIN n0 ac s wd) = go wd n0 ac where @@ -1759,8 +1757,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = undefined ini <- initial return $ Yield bres (GO_SHORT_PAT_DRAIN n ini s wrd) FL.Done1 bres -> do - ini <- initial - return $ GO_SHORT_PAT_DRAIN n ini s wrd + ini <- initial + return $ GO_SHORT_PAT_DRAIN n ini s wrd go _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) stepOuter gst (GO_SHORT_PAT stt) = initial >>= go0 SPEC 0 (0 :: Word) stt @@ -1957,6 +1955,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = undefined Stop -> return Stop stepOuter _ GO_DONE = return Stop -} + {-# INLINE_NORMAL splitSuffixOn #-} splitSuffixOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) From b9e30f883749f54968c1504c9690d811d0811a35 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Wed, 9 Sep 2020 06:07:05 +0530 Subject: [PATCH 07/30] Interim commit --- .../Benchmark/FileSystem/Handle/Read.hs | 8 +- src/Streamly/Internal/Data/Stream/StreamD.hs | 249 +++++++++--------- 2 files changed, 127 insertions(+), 130 deletions(-) diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index e20a6f01ee..7fc03cd56e 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -476,14 +476,14 @@ o_1_space_reduce_read_split env = -- splitOnSuffixSeq "" inh , mkBench "S.splitOnSeq \"\\n\" FL.drain" env $ \inh _ -> splitOnSeq "\n" inh - , mkBench "S.splitOnSuffixSeq \"\\n\" FL.drain" env $ \inh _ -> - splitOnSuffixSeq "\n" inh +-- , mkBench "S.splitOnSuffixSeq \"\\n\" FL.drain" env $ \inh _ -> +-- splitOnSuffixSeq "\n" inh , mkBench "S.splitOnSeq \"a\" FL.drain" env $ \inh _ -> splitOnSeq "a" inh , mkBench "S.splitOnSeq \"\\r\\n\" FL.drain" env $ \inh _ -> splitOnSeq "\r\n" inh - , mkBench "S.splitOnSuffixSeq \"\\r\\n\" FL.drain" env $ \inh _ -> - splitOnSuffixSeq "\r\n" inh +-- , mkBench "S.splitOnSuffixSeq \"\\r\\n\" FL.drain" env $ \inh _ -> +-- splitOnSuffixSeq "\r\n" inh , mkBench "S.splitOnSeq \"aa\" FL.drain" env $ \inh _ -> splitOnSeq "aa" inh , mkBench "S.splitOnSeq \"aaaa\" FL.drain" env $ \inh _ -> diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index 1c3624ad52..bb797d7c1d 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -325,7 +325,6 @@ import Data.Word (Word32) import Foreign.Ptr (Ptr) import Foreign.Storable (Storable(..)) import GHC.Types (SPEC(..)) -import GHC.IO (unsafePerformIO) import System.Mem (performMajorGC) import Fusion.Plugin.Types (Fuse(..)) import Streamly.Internal.Data.IORef.Prim (Prim) @@ -1619,20 +1618,26 @@ data SplitOptions = SplitOptions } -} - {-# ANN type SplitOnState Fuse #-} -data SplitOnState fs s a b = +data SplitOnState fs s a b w = GO_START | GO_EMPTY_PAT s - | GO_SINGLE_PAT_NEXT fs s a - | GO_SINGLE_PAT_WITH fs s a a - | GO_YIELD b (SplitOnState fs s a b) + | GO_EMPTY_PAT_WITH s a + | GO_SINGLE_PAT_NEXT !fs s a + | GO_SINGLE_PAT_WITH !fs s a a + | GO_SHORT_PAT_ACCUM Int s !w + | GO_SHORT_PAT_NEXT !fs s !w + | GO_SHORT_PAT_DRAIN Int !fs !w + | GO_YIELD b (SplitOnState fs s a b w) | GO_DONE -- XXX Can this be written as smaller folds and be used with foldMany/foldMany1. -- The logic is basically the same thing, the only catch being that we have rely -- on GHC for simplification. We can compare the performance and choose -- accordingly. +-- XXX This does not fuse. Even in origin/master this does not fuse. There is +-- about 15% degradation in performance. If internal recursion is used (go, go1 +-- etc.) there is about 1500% degradation in performance {-# INLINE_NORMAL splitOn #-} splitOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) @@ -1649,6 +1654,20 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = maxIndex = patLen - 1 elemBits = sizeOf (undefined :: a) * 8 + -- XXX I initially put this in the state but I'd like to keep the state as + -- simple as possible to allow more fusion. Removing this from the state did + -- not change anything. I'll introduce them back once I figure out what's + -- wrong. + mask :: Word + mask = (1 `shiftL` (elemBits * patLen)) - 1 + + pat :: Word + -- XXX You dont need .&. here? + pat = mask .&. A.foldl' addToWord 0 patArr + + -- Used only if the pattern is short + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = if patLen == 0 @@ -1658,16 +1677,37 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = acc <- initial pat <- liftIO $ A.unsafeIndexIO patArr 0 return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat - else undefined - {- - if sizeOf (undefined :: a) * patLen + else if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) - then return $ Skip $ GO_SHORT_PAT state + then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) else do + undefined + {- (rb, rhead) <- liftIO $ RB.new patLen return $ Skip $ GO_KARP_RABIN state rb rhead -} stepOuter _ (GO_YIELD x ns) = return $ Yield x ns + stepOuter _ GO_DONE = return Stop + --------------------------- + -- Empty pattern + --------------------------- + stepOuter _ (GO_EMPTY_PAT_WITH s x) = do + ini <- initial + res <- fstep ini x + case res of + FL.Partial sres -> do + r <- done sres + return $ Skip $ GO_YIELD r (GO_EMPTY_PAT s) + FL.Done bres -> return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT s) + FL.Done1 bres -> + return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT_WITH s x) + stepOuter gst (GO_EMPTY_PAT st) = do + res <- step (adaptState gst) st + return + $ case res of + Yield x s -> Skip $ GO_EMPTY_PAT_WITH s x + Skip s -> Skip $ GO_EMPTY_PAT s + Stop -> Stop ----------------- -- Single Pattern ----------------- @@ -1698,131 +1738,88 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = case res of Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat x Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat - Stop -> done fs >>= \r -> return $ Skip $ GO_YIELD r GO_DONE + Stop -> do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE --------------------------- - -- Empty pattern + -- Short Pattern - Shift Or --------------------------- - stepOuter gst (GO_EMPTY_PAT st) = do + stepOuter gst (GO_SHORT_PAT_ACCUM idx st wrd) = do res <- step (adaptState gst) st case res of Yield x s -> do - acc <- initial - acc' <- fstep acc x - case acc' of - FL.Partial sres -> - done sres - >>= \r -> return $ Skip $ GO_YIELD r (GO_EMPTY_PAT s) - FL.Done bres -> - return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT s) - Skip s -> return $ Skip (GO_EMPTY_PAT s) - Stop -> return Stop - stepOuter _ GO_DONE = return Stop -{- - --------------------------- - -- Short Pattern - Shift Or - --------------------------- - -- XXX I've adapted the code accordingly but in my opinion redesigning this - -- would be better. - addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - stepOuter _ (GO_SHORT_PAT_WITH fs s mask patWord wrd x) = do - let wrd' = addToWord wrd x - old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - res <- fstep fs (toEnum $ fromIntegral old) + let wrd1 = addToWord wrd x + if idx == maxIndex + then if wrd1 .&. mask == pat + then do + r <- initial >>= done + return + $ Skip + $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + else do + ini <- initial + return $ Skip $ GO_SHORT_PAT_NEXT ini s wrd1 + else return $ Skip $ GO_SHORT_PAT_ACCUM (idx + 1) s wrd1 + Skip s -> return $ Skip $ GO_SHORT_PAT_ACCUM idx s wrd + Stop -> do + ini <- initial + if idx /= 0 + then return $ Skip $ GO_SHORT_PAT_DRAIN idx ini wrd + else do + r <- done ini + return $ Skip $ GO_YIELD r GO_DONE + stepOuter gst (GO_SHORT_PAT_NEXT fs st wrd) = do + res <- step (adaptState gst) st case res of - FL.Partial sres -> - if wrd' .&. mask == patWord - then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else return $ Skip $ GO_SHORT_PAT s - FL.Done bres -> return $ Yield bres (GO_SHORT_PAT s) - FL.Done1 bres -> return $ Yield bres (GO_SHORT_PAT_WITH s wrd x) - stepOuter _ (GO_SHORT_PAT_DRAIN n0 ac s wd) = go wd n0 ac - - where - - -- XXX Duplicated code - mask :: Word - mask = (1 `shiftL` (elemBits * patLen)) - 1 - - -- Check if this is correct? - -- Consider n == 1, fromIntegral (mask .&. word) is considering more - -- than one element. We should change the mask to ignore the previous - -- elements. - go !wrd !n !acc - | n > 0 = do - let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - fres <- fstep acc (toEnum $ fromIntegral old) + Yield x s -> do + let wrd1 = addToWord wrd x + old = wrd `shiftR` (elemBits * (patLen - 1)) + fres <- fstep fs (toEnum $ fromIntegral old) case fres of - FL.Partial sres -> go wrd (n - 1) sres - FL.Done bres -> do - ini <- initial - return $ Yield bres (GO_SHORT_PAT_DRAIN n ini s wrd) - FL.Done1 bres -> do - ini <- initial - return $ GO_SHORT_PAT_DRAIN n ini s wrd - go _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) - stepOuter gst (GO_SHORT_PAT stt) = initial >>= go0 SPEC 0 (0 :: Word) stt - - where - - mask :: Word - mask = (1 `shiftL` (elemBits * patLen)) - 1 - - addToWord wrd a = (wrd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - patWord :: Word - patWord = mask .&. A.foldl' addToWord 0 patArr - - go0 !_ !idx wrd st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd' = addToWord wrd x - if idx == maxIndex - then do - if wrd' .&. mask == patWord + FL.Partial sres -> + if wrd1 .&. mask == pat then do - r <- done acc - return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s acc - else go0 SPEC (idx + 1) wrd' s acc - Skip s -> go0 SPEC idx wrd s acc - Stop -> - if idx /= 0 - then -- stepOuter gst (GO_SHORT_PAT_DRAIN patLen acc s wrd) - go2 wrd idx acc - else done acc >>= \r -> return (Yield r GO_DONE) + r <- done sres + return + $ Skip + $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + else return $ Skip $ GO_SHORT_PAT_NEXT sres s wrd1 + -- If the fold terminates then the behaviour is same as + -- if the pattern is matched + FL.Done bres -> + return + $ Skip + $ GO_YIELD bres $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + FL.Done1 bres -> + let wrd1 = addToWord (0 :: Word) x + in return + $ Skip + $ GO_YIELD bres $ GO_SHORT_PAT_ACCUM 0 s wrd1 + Skip s -> return $ Skip $ GO_SHORT_PAT_NEXT fs s wrd + Stop -> return $ Skip $ GO_SHORT_PAT_DRAIN patLen fs wrd + -- XXX Check if this is correct? Consider n == 1, fromIntegral (mask + -- .&. word) is considering more than one element. We should change the mask + -- to ignore the previous elements. + stepOuter _ (GO_SHORT_PAT_DRAIN 0 fs _) = do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE + stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do + let old = wrd `shiftR` (elemBits * (n - 1)) + mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask .&. wrd + fres <- fstep fs (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + return $ Skip $ GO_SHORT_PAT_DRAIN (n - 1) sres wrd1 + FL.Done bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_DRAIN (n - 1) ini wrd1 + FL.Done1 bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_DRAIN n ini wrd - {-# INLINE go1 #-} - go1 !_ wrd st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd' = addToWord wrd x - old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - acc' <- fstep acc (toEnum $ fromIntegral old) - case acc' of - FL.Partial sres -> - if wrd' .&. mask == patWord - then done sres - >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s sres - FL.Done bres -> return $ Yield bres (GO_SHORT_PAT s) - FL.Done1 bres -> - return $ Yield bres (GO_SHORT_PAT_WITH s wrd x) - Skip s -> go1 SPEC wrd s acc - Stop -> - -- stepOuter gst (GO_SHORT_PAT_DRAIN patLen acc s wrd) - go2 wrd patLen acc - - -- Eliminate this? - go2 !wrd !n !acc - | n > 0 = do - let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - fres <- fstep acc (toEnum $ fromIntegral old) - case fres of - FL.Partial sres -> go2 wrd (n - 1) sres - FL.Done bres -> return $ Yield bres GO_DONE - go2 _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) +{- ------------------------------- -- General Pattern - Karp Rabin ------------------------------- From f491349f57f9f19aefe7f50aa3c914ef69c7b6ee Mon Sep 17 00:00:00 2001 From: adithyaov Date: Wed, 9 Sep 2020 15:21:31 +0530 Subject: [PATCH 08/30] fixup! Interim commit --- src/Streamly/Internal/Data/Stream/StreamD.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index bb797d7c1d..360c19bf58 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1657,10 +1657,13 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- XXX I initially put this in the state but I'd like to keep the state as -- simple as possible to allow more fusion. Removing this from the state did -- not change anything. I'll introduce them back once I figure out what's - -- wrong. + -- currently wrong. mask :: Word mask = (1 `shiftL` (elemBits * patLen)) - 1 - + -- XXX I initially put this in the state but I'd like to keep the state as + -- simple as possible to allow more fusion. Removing this from the state did + -- not change anything. I'll introduce them back once I figure out what's + -- currently wrong. pat :: Word -- XXX You dont need .&. here? pat = mask .&. A.foldl' addToWord 0 patArr From 7d26e53458958bafca63a5f34e6fa42a573f3e8c Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 03:08:32 +0530 Subject: [PATCH 09/30] Expose advance and add moveBy in Ring --- src/Streamly/Memory/Ring.hs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Streamly/Memory/Ring.hs b/src/Streamly/Memory/Ring.hs index ba97a4eda6..1c1ff90fb6 100644 --- a/src/Streamly/Memory/Ring.hs +++ b/src/Streamly/Memory/Ring.hs @@ -12,6 +12,8 @@ module Streamly.Memory.Ring -- * Construction , new + , advance + , moveBy -- * Modification , unsafeInsert @@ -79,6 +81,23 @@ advance Ring{..} ringHead = then ptr else unsafeForeignPtrToPtr ringStart +-- | Move the ringHead by n items. The direction depends on the sign on whether +-- n is positive or negative. Wrap around if we hit the beginning or end of the +-- array. +{-# INLINE moveBy #-} +moveBy :: forall a. Storable a => Int -> Ring a -> Ptr a -> Ptr a +moveBy by Ring {..} ringHead = ringStartPtr `plusPtr` advanceFromHead + + where + + elemSize = sizeOf (undefined :: a) + ringStartPtr = unsafeForeignPtrToPtr ringStart + lenInBytes = ringBound `minusPtr` ringStartPtr + offInBytes = ringHead `minusPtr` ringStartPtr + len = assert (lenInBytes `mod` elemSize == 0) $ lenInBytes `div` elemSize + off = assert (offInBytes `mod` elemSize == 0) $ offInBytes `div` elemSize + advanceFromHead = (off + by `mod` len) * elemSize + -- | Insert an item at the head of the ring, when the ring is full this -- replaces the oldest item in the ring with the new item. This is unsafe -- beause ringHead supplied is not verified to be within the Ring. Also, From d4b97dba93c4539dd00c9123904131b0ff67accc Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 03:10:22 +0530 Subject: [PATCH 10/30] fixup! fixup! Interim commit --- src/Streamly/Internal/Data/Stream/StreamD.hs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index 360c19bf58..f530ecb004 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1775,11 +1775,13 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = res <- step (adaptState gst) st case res of Yield x s -> do - let wrd1 = addToWord wrd x + let wrd1 = mask .&. addToWord wrd x old = wrd `shiftR` (elemBits * (patLen - 1)) fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> + -- XXX Removing the then branch increases the + -- performance by 150% if wrd1 .&. mask == pat then do r <- done sres @@ -1794,10 +1796,10 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) FL.Done1 bres -> - let wrd1 = addToWord (0 :: Word) x - in return - $ Skip - $ GO_YIELD bres $ GO_SHORT_PAT_ACCUM 0 s wrd1 + return + $ Skip + $ GO_YIELD bres + $ GO_SHORT_PAT_ACCUM 1 s $ addToWord (0 :: Word) x Skip s -> return $ Skip $ GO_SHORT_PAT_NEXT fs s wrd Stop -> return $ Skip $ GO_SHORT_PAT_DRAIN patLen fs wrd -- XXX Check if this is correct? Consider n == 1, fromIntegral (mask From d787b1bb14f298b39ea358a4d225e89a05c2a0d1 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 03:11:28 +0530 Subject: [PATCH 11/30] splitOn Rabin Karp --- src/Streamly/Internal/Data/Stream/StreamD.hs | 235 ++++++++++++++++++- 1 file changed, 227 insertions(+), 8 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index f530ecb004..da3cd77eb4 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1619,7 +1619,7 @@ data SplitOptions = SplitOptions -} {-# ANN type SplitOnState Fuse #-} -data SplitOnState fs s a b w = +data SplitOnState fs s a b w rb rh ck = GO_START | GO_EMPTY_PAT s | GO_EMPTY_PAT_WITH s a @@ -1628,16 +1628,24 @@ data SplitOnState fs s a b w = | GO_SHORT_PAT_ACCUM Int s !w | GO_SHORT_PAT_NEXT !fs s !w | GO_SHORT_PAT_DRAIN Int !fs !w - | GO_YIELD b (SplitOnState fs s a b w) + | GO_KARP_RABIN_ACCUM Int s rb rh + | GO_KARP_RABIN_NEXT fs s rb rh ck + | GO_KARP_RABIN_DRAIN Int fs rb rh + | GO_YIELD b (SplitOnState fs s a b w rb rh ck) | GO_DONE -- XXX Can this be written as smaller folds and be used with foldMany/foldMany1. -- The logic is basically the same thing, the only catch being that we have rely -- on GHC for simplification. We can compare the performance and choose -- accordingly. --- XXX This does not fuse. Even in origin/master this does not fuse. There is --- about 15% degradation in performance. If internal recursion is used (go, go1 --- etc.) there is about 1500% degradation in performance + +-- XXX In the case for SINGLE_PAT This does not fuse. Even in origin/master this +-- does not fuse. There is about 15% degradation in performance. If internal +-- recursion is used (go, go1 etc.) there is about 1500% degradation in +-- performance. + +-- XXX In the case for SHORT_PAT it's pretty bad. Its about 10 times worse. With +-- the fusion-plugin it's about 5-6 times worse. {-# INLINE_NORMAL splitOn #-} splitOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) @@ -1655,11 +1663,12 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = elemBits = sizeOf (undefined :: a) * 8 -- XXX I initially put this in the state but I'd like to keep the state as - -- simple as possible to allow more fusion. Removing this from the state did - -- not change anything. I'll introduce them back once I figure out what's - -- currently wrong. + -- simple as possible to allow more fusion (if that's how it + -- works). Removing this from the state did not change anything. I'll + -- introduce them back once I figure out what's currently wrong. mask :: Word mask = (1 `shiftL` (elemBits * patLen)) - 1 + -- XXX I initially put this in the state but I'd like to keep the state as -- simple as possible to allow more fusion. Removing this from the state did -- not change anything. I'll introduce them back once I figure out what's @@ -1671,6 +1680,18 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- Used only if the pattern is short addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + -- Used in Rabin-Karp + k = 2891336453 :: Word32 + coeff = k ^ patLen + + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + + deltaCksum cksum old new = + addCksum cksum new - coeff * fromIntegral (fromEnum old) + + -- XXX shall we use a random starting hash or 1 instead of 0? + patHash = A.foldl' addCksum 0 patArr + {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = if patLen == 0 @@ -1747,6 +1768,7 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = --------------------------- -- Short Pattern - Shift Or --------------------------- + -- XXX mask can be omitted in most of the places? stepOuter gst (GO_SHORT_PAT_ACCUM idx st wrd) = do res <- step (adaptState gst) st case res of @@ -1823,6 +1845,203 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = FL.Done1 bres -> do ini <- initial return $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_DRAIN n ini wrd + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + stepOuter gst (GO_KARP_RABIN_ACCUM idx st rb rh) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + rh1 <- liftIO $ RB.unsafeInsert rb rh x + if idx == maxIndex + then do + let fold = RB.unsafeFoldRing (RB.ringBound rb) + let !ringHash = fold addCksum 0 rb + if ringHash == patHash + then do + r <- initial >>= done + return + $ Skip $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh1 + else do + ini <- initial + return $ Skip $ GO_KARP_RABIN_NEXT ini s rb rh1 ringHash + else return $ Skip $ GO_KARP_RABIN_ACCUM (idx + 1) s rb rh1 + Skip s -> return $ Skip $ GO_KARP_RABIN_ACCUM idx s rb rh + Stop -> do + ini <- initial + let rh1 = RB.moveBy (0 - idx) rb (rh :: Ptr a) + if idx /= 0 + then return $ Skip $ GO_KARP_RABIN_DRAIN idx ini rb rh1 + else do + r <- done ini + return $ Skip $ GO_YIELD r GO_DONE + -- XXX Theoretically this code can do 4 times faster if GHC generates + -- optimal code. If we use just "(cksum1 == patHash)" condition it goes 4x + -- faster, as soon as we add the "RB.unsafeEqArray rb v" condition the + -- generated code changes drastically and becomes 4x slower. Need to + -- investigate what is going on with GHC. + stepOuter gst (GO_KARP_RABIN_NEXT fs st rb rh cksum) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum1 = deltaCksum cksum old x + fres <- fstep fs old + case fres of + FL.Partial sres -> + if (cksum1 == patHash) + then do + r <- done sres + -- XXX It does not matter whether we give rh or rh1 + -- here. We restart anyway. + return + $ Skip + $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh + else do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + return + $ Skip $ GO_KARP_RABIN_NEXT sres s rb rh1 cksum1 + FL.Done bres -> + return + $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_ACCUM 0 s rb rh + FL.Done1 bres -> do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_ACCUM 1 s rb rh1 + Skip s -> return $ Skip $ GO_KARP_RABIN_NEXT fs s rb rh cksum + Stop -> return $ Skip $ GO_KARP_RABIN_DRAIN patLen fs rb rh + stepOuter _ (GO_KARP_RABIN_DRAIN 0 fs _ _) = do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE + stepOuter _ (GO_KARP_RABIN_DRAIN n fs rb rh) = do + old <- liftIO $ peek rh + let rh1 = RB.advance rb rh + fres <- fstep fs old + case fres of + FL.Partial sres -> + return $ Skip $ GO_KARP_RABIN_DRAIN (n - 1) sres rb rh1 + FL.Done bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_DRAIN (n - 1) ini rb rh1 + FL.Done1 bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_DRAIN n ini rb rh + +{- + stepOuter gst (GO_KARP_RABIN stt rb rhead) = + initial >>= go0 SPEC 0 rhead stt + + where + + -- rh == ringHead + go0 !_ !idx !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + rh' <- liftIO $ RB.unsafeInsert rb rh x + if idx == maxIndex + then do + let fold = RB.unsafeFoldRing (RB.ringBound rb) + let !ringHash = fold addCksum 0 rb + if ringHash == patHash + then go2 SPEC ringHash rh' s acc + else go1 SPEC ringHash rh' s acc + else go0 SPEC (idx + 1) rh' s acc + Skip s -> go0 SPEC idx rh s acc + Stop -> do + !acc' <- + if idx /= 0 + then RB.unsafeFoldRingM + rh + (liftStep fstep) + (FL.Partial acc) + rb + else return (FL.Partial acc) + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + + -- XXX Theoretically this code can do 4 times faster if GHC generates + -- optimal code. If we use just "(cksum' == patHash)" condition it goes + -- 4x faster, as soon as we add the "RB.unsafeEqArray rb v" condition + -- the generated code changes drastically and becomes 4x slower. Need + -- to investigate what is going on with GHC. + {-# INLINE go1 #-} + go1 !_ !cksum !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum' = deltaCksum cksum old x + acc' <- fstep acc old + case acc' of + FL.Partial sres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2 SPEC cksum' rh' s sres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1 SPEC cksum' rh' s sres + FL.Done bres -> + if (cksum' == patHash) + then do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go2' SPEC cksum' rh' s bres + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s bres + Skip s -> go1 SPEC cksum rh s acc + Stop -> do + acc' <- + RB.unsafeFoldRingFullM + rh + (liftStep fstep) + (FL.Partial acc) + rb + liftExtract done acc' >>= \r -> return $ Yield r GO_DONE + + go1' !_ !cksum !rh st !acc = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + old <- liftIO $ peek rh + let cksum' = deltaCksum cksum old x + if (cksum' == patHash) + then do + return $ Yield acc (GO_KARP_RABIN s rb rhead) + else do + rh' <- liftIO (RB.unsafeInsert rb rh x) + go1' SPEC cksum' rh' s acc + Skip s -> go1' SPEC cksum rh s acc + Stop -> return $ Yield acc GO_DONE + + go2 !_ !cksum' !rh' s !acc' = do + if RB.unsafeEqArray rb rh' patArr + then do + r <- done acc' + return $ Yield r (GO_KARP_RABIN s rb rhead) + else go1 SPEC cksum' rh' s acc' + + go2' !_ !cksum' !rh' s !acc' = do + if RB.unsafeEqArray rb rh' patArr + then return $ Yield acc' (GO_KARP_RABIN s rb rhead) + else go1' SPEC cksum' rh' s acc' + stepOuter gst (GO_EMPTY_PAT st) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + acc <- initial + acc' <- fstep acc x + case acc' of + FL.Partial sres -> + done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) + FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) + Skip s -> return $ Skip (GO_EMPTY_PAT s) + Stop -> return Stop + stepOuter _ GO_DONE = return Stop +-} {- ------------------------------- From 05a01728bc60c77123c6b48adcea067bb80b6041 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 03:22:01 +0530 Subject: [PATCH 12/30] fixup! splitOn Rabin Karp --- src/Streamly/Internal/Data/Stream/StreamD.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index da3cd77eb4..1b967e4a01 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1705,11 +1705,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) else do - undefined - {- (rb, rhead) <- liftIO $ RB.new patLen - return $ Skip $ GO_KARP_RABIN state rb rhead - -} + return $ Skip $ GO_KARP_RABIN_ACCUM 0 state rb rhead stepOuter _ (GO_YIELD x ns) = return $ Yield x ns stepOuter _ GO_DONE = return Stop --------------------------- From 1b0fc46836947228d68c8b8287ab5a5b6769e10c Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 03:46:47 +0530 Subject: [PATCH 13/30] fixup! fixup! splitOn Rabin Karp --- src/Streamly/Internal/Data/Stream/StreamD.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index 1b967e4a01..fc25422165 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1645,7 +1645,10 @@ data SplitOnState fs s a b w rb rh ck = -- performance. -- XXX In the case for SHORT_PAT it's pretty bad. Its about 10 times worse. With --- the fusion-plugin it's about 5-6 times worse. +-- the fusion-plugin it's about 5 times worse. + +-- XXX In the case for KARP_RABIN its about 4 times worse with the fusion-plugin +-- and 7 times worse without. {-# INLINE_NORMAL splitOn #-} splitOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) From 5666f1fa85d711fd1d52be52ff2b8fa6c839ca90 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 10 Sep 2020 23:04:59 +0530 Subject: [PATCH 14/30] Fix splitOn --- src/Streamly/Internal/Data/Stream/StreamD.hs | 130 ++++++++++--------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index fc25422165..13d0d202e8 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1627,10 +1627,12 @@ data SplitOnState fs s a b w rb rh ck = | GO_SINGLE_PAT_WITH !fs s a a | GO_SHORT_PAT_ACCUM Int s !w | GO_SHORT_PAT_NEXT !fs s !w + | GO_SHORT_PAT_NEXT_WITH !fs s a !w | GO_SHORT_PAT_DRAIN Int !fs !w | GO_KARP_RABIN_ACCUM Int s rb rh - | GO_KARP_RABIN_NEXT fs s rb rh ck - | GO_KARP_RABIN_DRAIN Int fs rb rh + | GO_KARP_RABIN_NEXT !fs s rb rh !ck + | GO_KARP_RABIN_NEXT_WITH !fs s a rb rh !ck + | GO_KARP_RABIN_DRAIN Int !fs rb rh | GO_YIELD b (SplitOnState fs s a b w rb rh ck) | GO_DONE @@ -1639,10 +1641,11 @@ data SplitOnState fs s a b w rb rh ck = -- on GHC for simplification. We can compare the performance and choose -- accordingly. --- XXX In the case for SINGLE_PAT This does not fuse. Even in origin/master this --- does not fuse. There is about 15% degradation in performance. If internal --- recursion is used (go, go1 etc.) there is about 1500% degradation in --- performance. +-- XXX This does not fuse. Even in origin/master this does not completely fuse. + +-- XXX In the case for SINGLE_PAT There is about 15% degradation in +-- performance. If internal recursion is used (go, go1 etc.) there is about +-- 1500% degradation in performance. -- XXX In the case for SHORT_PAT it's pretty bad. Its about 10 times worse. With -- the fusion-plugin it's about 5 times worse. @@ -1668,7 +1671,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- XXX I initially put this in the state but I'd like to keep the state as -- simple as possible to allow more fusion (if that's how it -- works). Removing this from the state did not change anything. I'll - -- introduce them back once I figure out what's currently wrong. + -- introduce them back once I figure out what's currently wrong. Ideally, I + -- want them to be associated with the state. mask :: Word mask = (1 `shiftL` (elemBits * patLen)) - 1 @@ -1738,7 +1742,6 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- XXX These functions cab be designed in a better way? Get rid of go and -- have more info in the state but you would be relying on GHC for -- simplification. - -- XXX Multiple Yield points stepOuter _ (GO_SINGLE_PAT_WITH fs s pat x) = if pat == x then do @@ -1793,35 +1796,34 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = else do r <- done ini return $ Skip $ GO_YIELD r GO_DONE + stepOuter _ (GO_SHORT_PAT_NEXT_WITH fs s x wrd) = do + let wrd1 = mask .&. addToWord wrd x + old = wrd `shiftR` (elemBits * (patLen - 1)) + fres <- fstep fs (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + -- XXX Removing the then branch increases the + -- performance by 150% + if wrd1 .&. mask == pat + then do + r <- done sres + return + $ Skip $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + else return $ Skip $ GO_SHORT_PAT_NEXT sres s wrd1 + -- If the fold terminates then the behaviour is same as if + -- the pattern is matched. We should not ignore the + -- currently stored pattern though. + FL.Done bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_NEXT ini s wrd1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_NEXT_WITH ini s x wrd stepOuter gst (GO_SHORT_PAT_NEXT fs st wrd) = do res <- step (adaptState gst) st case res of - Yield x s -> do - let wrd1 = mask .&. addToWord wrd x - old = wrd `shiftR` (elemBits * (patLen - 1)) - fres <- fstep fs (toEnum $ fromIntegral old) - case fres of - FL.Partial sres -> - -- XXX Removing the then branch increases the - -- performance by 150% - if wrd1 .&. mask == pat - then do - r <- done sres - return - $ Skip - $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) - else return $ Skip $ GO_SHORT_PAT_NEXT sres s wrd1 - -- If the fold terminates then the behaviour is same as - -- if the pattern is matched - FL.Done bres -> - return - $ Skip - $ GO_YIELD bres $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) - FL.Done1 bres -> - return - $ Skip - $ GO_YIELD bres - $ GO_SHORT_PAT_ACCUM 1 s $ addToWord (0 :: Word) x + Yield x s -> return $ Skip $ GO_SHORT_PAT_NEXT_WITH fs s x wrd Skip s -> return $ Skip $ GO_SHORT_PAT_NEXT fs s wrd Stop -> return $ Skip $ GO_SHORT_PAT_DRAIN patLen fs wrd -- XXX Check if this is correct? Consider n == 1, fromIntegral (mask @@ -1880,37 +1882,39 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- faster, as soon as we add the "RB.unsafeEqArray rb v" condition the -- generated code changes drastically and becomes 4x slower. Need to -- investigate what is going on with GHC. + stepOuter _ (GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum) = do + old <- liftIO $ peek rh + let cksum1 = deltaCksum cksum old x + fres <- fstep fs old + case fres of + FL.Partial sres -> + if (cksum1 == patHash) + then do + r <- done sres + -- XXX It does not matter whether we give rh or rh1 + -- here. We restart anyway. + return $ Skip $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh + else do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + return $ Skip $ GO_KARP_RABIN_NEXT sres s rb rh1 cksum1 + FL.Done bres -> do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_NEXT ini s rb rh1 cksum1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_NEXT_WITH ini s x rb rh cksum stepOuter gst (GO_KARP_RABIN_NEXT fs st rb rh cksum) = do res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum1 = deltaCksum cksum old x - fres <- fstep fs old - case fres of - FL.Partial sres -> - if (cksum1 == patHash) - then do - r <- done sres - -- XXX It does not matter whether we give rh or rh1 - -- here. We restart anyway. - return - $ Skip - $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh - else do - rh1 <- liftIO (RB.unsafeInsert rb rh x) - return - $ Skip $ GO_KARP_RABIN_NEXT sres s rb rh1 cksum1 - FL.Done bres -> - return - $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_ACCUM 0 s rb rh - FL.Done1 bres -> do - rh1 <- liftIO (RB.unsafeInsert rb rh x) - return - $ Skip - $ GO_YIELD bres $ GO_KARP_RABIN_ACCUM 1 s rb rh1 - Skip s -> return $ Skip $ GO_KARP_RABIN_NEXT fs s rb rh cksum - Stop -> return $ Skip $ GO_KARP_RABIN_DRAIN patLen fs rb rh + return + $ case res of + Yield x s -> Skip $ GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum + Skip s -> Skip $ GO_KARP_RABIN_NEXT fs s rb rh cksum + Stop -> Skip $ GO_KARP_RABIN_DRAIN patLen fs rb rh stepOuter _ (GO_KARP_RABIN_DRAIN 0 fs _ _) = do r <- done fs return $ Skip $ GO_YIELD r GO_DONE From caeb5ae88b66c4b7b5e511ffcb3d03f0c5fd84ad Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 11 Sep 2020 07:20:21 +0530 Subject: [PATCH 15/30] splitSuffixOn --- src/Streamly/Internal/Data/Stream/StreamD.hs | 1092 ++++++------------ 1 file changed, 376 insertions(+), 716 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index 13d0d202e8..bae0cfc69c 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1618,21 +1618,26 @@ data SplitOptions = SplitOptions } -} +-- XXX Use a different type for SplitSuffixOn? +-- XXX We can merge *ACCUM and *ACCUM1 constructors. {-# ANN type SplitOnState Fuse #-} data SplitOnState fs s a b w rb rh ck = GO_START | GO_EMPTY_PAT s | GO_EMPTY_PAT_WITH s a + | GO_SINGLE_PAT_BEGIN s a -- Only for splitSuffixOn | GO_SINGLE_PAT_NEXT !fs s a | GO_SINGLE_PAT_WITH !fs s a a | GO_SHORT_PAT_ACCUM Int s !w | GO_SHORT_PAT_NEXT !fs s !w | GO_SHORT_PAT_NEXT_WITH !fs s a !w | GO_SHORT_PAT_DRAIN Int !fs !w + | GO_SHORT_PAT_YIELD_SEP Int !fs s w -- Only for splitSuffixOn. | GO_KARP_RABIN_ACCUM Int s rb rh | GO_KARP_RABIN_NEXT !fs s rb rh !ck | GO_KARP_RABIN_NEXT_WITH !fs s a rb rh !ck | GO_KARP_RABIN_DRAIN Int !fs rb rh + | GO_KARP_RABIN_YIELD_SEP Int !fs s rb rh -- Only for splitSuffixOn. | GO_YIELD b (SplitOnState fs s a b w rb rh ck) | GO_DONE @@ -1706,8 +1711,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = else if patLen == 1 then do acc <- initial - pat <- liftIO $ A.unsafeIndexIO patArr 0 - return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat + pat_ <- liftIO $ A.unsafeIndexIO patArr 0 + return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat_ else if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) @@ -1742,29 +1747,31 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = -- XXX These functions cab be designed in a better way? Get rid of go and -- have more info in the state but you would be relying on GHC for -- simplification. - stepOuter _ (GO_SINGLE_PAT_WITH fs s pat x) = - if pat == x + -- XXX We can probably move pat_ outside? Or move the things outside, inside + -- the state to make it consistent. + stepOuter _ (GO_SINGLE_PAT_WITH fs s pat_ x) = + if pat_ == x then do r <- done fs ini <- initial - return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_NEXT ini s pat) + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_NEXT ini s pat_) else do res <- fstep fs x case res of FL.Partial sres -> - return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat_) FL.Done bres -> do ini <- initial - return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_NEXT ini s pat) + return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_NEXT ini s pat_) FL.Done1 bres -> do ini <- initial return - $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) - stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat) = do + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat_ x) + stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat_) = do res <- step (adaptState gst) st case res of - Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat x - Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat + Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat_ x + Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat_ Stop -> do r <- done fs return $ Skip $ GO_YIELD r GO_DONE @@ -1911,10 +1918,11 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = stepOuter gst (GO_KARP_RABIN_NEXT fs st rb rh cksum) = do res <- step (adaptState gst) st return + $ Skip $ case res of - Yield x s -> Skip $ GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum - Skip s -> Skip $ GO_KARP_RABIN_NEXT fs s rb rh cksum - Stop -> Skip $ GO_KARP_RABIN_DRAIN patLen fs rb rh + Yield x s -> GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum + Skip s -> GO_KARP_RABIN_NEXT fs s rb rh cksum + Stop -> GO_KARP_RABIN_DRAIN patLen fs rb rh stepOuter _ (GO_KARP_RABIN_DRAIN 0 fs _ _) = do r <- done fs return $ Skip $ GO_YIELD r GO_DONE @@ -1933,254 +1941,24 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = FL.Done1 bres -> do ini <- initial return $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_DRAIN n ini rb rh + stepOuter _ _ = undefined -{- - stepOuter gst (GO_KARP_RABIN stt rb rhead) = - initial >>= go0 SPEC 0 rhead stt - - where - - -- rh == ringHead - go0 !_ !idx !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - rh' <- liftIO $ RB.unsafeInsert rb rh x - if idx == maxIndex - then do - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if ringHash == patHash - then go2 SPEC ringHash rh' s acc - else go1 SPEC ringHash rh' s acc - else go0 SPEC (idx + 1) rh' s acc - Skip s -> go0 SPEC idx rh s acc - Stop -> do - !acc' <- - if idx /= 0 - then RB.unsafeFoldRingM - rh - (liftStep fstep) - (FL.Partial acc) - rb - else return (FL.Partial acc) - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - -- XXX Theoretically this code can do 4 times faster if GHC generates - -- optimal code. If we use just "(cksum' == patHash)" condition it goes - -- 4x faster, as soon as we add the "RB.unsafeEqArray rb v" condition - -- the generated code changes drastically and becomes 4x slower. Need - -- to investigate what is going on with GHC. - {-# INLINE go1 #-} - go1 !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - acc' <- fstep acc old - case acc' of - FL.Partial sres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s sres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s sres - FL.Done bres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2' SPEC cksum' rh' s bres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s bres - Skip s -> go1 SPEC cksum rh s acc - Stop -> do - acc' <- - RB.unsafeFoldRingFullM - rh - (liftStep fstep) - (FL.Partial acc) - rb - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - go1' !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - if (cksum' == patHash) - then do - return $ Yield acc (GO_KARP_RABIN s rb rhead) - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s acc - Skip s -> go1' SPEC cksum rh s acc - Stop -> return $ Yield acc GO_DONE - - go2 !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then do - r <- done acc' - return $ Yield r (GO_KARP_RABIN s rb rhead) - else go1 SPEC cksum' rh' s acc' - - go2' !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then return $ Yield acc' (GO_KARP_RABIN s rb rhead) - else go1' SPEC cksum' rh' s acc' - stepOuter gst (GO_EMPTY_PAT st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - case acc' of - FL.Partial sres -> - done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) - FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) - Skip s -> return $ Skip (GO_EMPTY_PAT s) - Stop -> return Stop - stepOuter _ GO_DONE = return Stop --} - -{- - ------------------------------- - -- General Pattern - Karp Rabin - ------------------------------- - stepOuter gst (GO_KARP_RABIN stt rb rhead) = - initial >>= go0 SPEC 0 rhead stt - - where - - k = 2891336453 :: Word32 - coeff = k ^ patLen - - addCksum cksum a = cksum * k + fromIntegral (fromEnum a) - - deltaCksum cksum old new = - addCksum cksum new - coeff * fromIntegral (fromEnum old) - - -- XXX shall we use a random starting hash or 1 instead of 0? - patHash = A.foldl' addCksum 0 patArr +-- XXX Can this be written as smaller folds and be used with foldMany/foldMany1. +-- The logic is basically the same thing, the only catch being that we have rely +-- on GHC for simplification. We can compare the performance and choose +-- accordingly. - -- XXX Have a terminating fold for folding a Ring to eliminate these - liftStep lstep (FL.Partial s) a = lstep s a - liftStep _ x _ = return x +-- XXX This does not fuse. Even in origin/master this does not completely fuse. - liftExtract _ (FL.Done b) = return b - liftExtract ldone (FL.Partial s) = ldone s +-- XXX In the case for SINGLE_PAT There is about 15% degradation in +-- performance. If internal recursion is used (go, go1 etc.) there is about +-- 1500% degradation in performance. - -- rh == ringHead - go0 !_ !idx !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - rh' <- liftIO $ RB.unsafeInsert rb rh x - if idx == maxIndex - then do - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if ringHash == patHash - then go2 SPEC ringHash rh' s acc - else go1 SPEC ringHash rh' s acc - else go0 SPEC (idx + 1) rh' s acc - Skip s -> go0 SPEC idx rh s acc - Stop -> do - !acc' <- - if idx /= 0 - then RB.unsafeFoldRingM - rh - (liftStep fstep) - (FL.Partial acc) - rb - else return (FL.Partial acc) - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - -- XXX Theoretically this code can do 4 times faster if GHC generates - -- optimal code. If we use just "(cksum' == patHash)" condition it goes - -- 4x faster, as soon as we add the "RB.unsafeEqArray rb v" condition - -- the generated code changes drastically and becomes 4x slower. Need - -- to investigate what is going on with GHC. - {-# INLINE go1 #-} - go1 !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - acc' <- fstep acc old - case acc' of - FL.Partial sres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s sres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s sres - FL.Done bres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2' SPEC cksum' rh' s bres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s bres - Skip s -> go1 SPEC cksum rh s acc - Stop -> do - acc' <- - RB.unsafeFoldRingFullM - rh - (liftStep fstep) - (FL.Partial acc) - rb - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - go1' !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - if (cksum' == patHash) - then do - return $ Yield acc (GO_KARP_RABIN s rb rhead) - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s acc - Skip s -> go1' SPEC cksum rh s acc - Stop -> return $ Yield acc GO_DONE - - go2 !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then do - r <- done acc' - return $ Yield r (GO_KARP_RABIN s rb rhead) - else go1 SPEC cksum' rh' s acc' - - go2' !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then return $ Yield acc' (GO_KARP_RABIN s rb rhead) - else go1' SPEC cksum' rh' s acc' - stepOuter gst (GO_EMPTY_PAT st) = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - case acc' of - FL.Partial sres -> - done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) - FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) - Skip s -> return $ Skip (GO_EMPTY_PAT s) - Stop -> return Stop - stepOuter _ GO_DONE = return Stop --} +-- XXX In the case for SHORT_PAT it's pretty bad. Its about 10 times worse. With +-- the fusion-plugin it's about 5 times worse. +-- XXX In the case for KARP_RABIN its about 4 times worse with the fusion-plugin +-- and 7 times worse without. {-# INLINE_NORMAL splitSuffixOn #-} splitSuffixOn :: forall m a b. (MonadIO m, Storable a, Enum a, Eq a) @@ -2189,9 +1967,7 @@ splitSuffixOn -> Fold m a b -> Stream m a -> Stream m b -splitSuffixOn withSep patArr (Fold fstep initial done) - (Stream step state) = undefined -{- +splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = Stream stepOuter GO_START where @@ -2199,483 +1975,367 @@ splitSuffixOn withSep patArr (Fold fstep initial done) patLen = A.length patArr maxIndex = patLen - 1 elemBits = sizeOf (undefined :: a) * 8 - sngPat = unsafePerformIO $ A.unsafeIndexIO patArr 0 + + -- XXX I initially put this in the state but I'd like to keep the state as + -- simple as possible to allow more fusion (if that's how it + -- works). Removing this from the state did not change anything. I'll + -- introduce them back once I figure out what's currently wrong. Ideally, I + -- want them to be associated with the state. + mask :: Word + mask = (1 `shiftL` (elemBits * patLen)) - 1 + + -- XXX I initially put this in the state but I'd like to keep the state as + -- simple as possible to allow more fusion. Removing this from the state did + -- not change anything. I'll introduce them back once I figure out what's + -- currently wrong. + pat :: Word + -- XXX You dont need .&. here? + pat = mask .&. A.foldl' addToWord 0 patArr + + -- Used only if the pattern is short + addToWord wd a = (wd `shiftL` elemBits) .|. fromIntegral (fromEnum a) + + -- Used in Rabin-Karp + k = 2891336453 :: Word32 + coeff = k ^ patLen + + addCksum cksum a = cksum * k + fromIntegral (fromEnum a) + + deltaCksum cksum old new = + addCksum cksum new - coeff * fromIntegral (fromEnum old) + + -- XXX shall we use a random starting hash or 1 instead of 0? + patHash = A.foldl' addCksum 0 patArr {-# INLINE_LATE stepOuter #-} stepOuter _ GO_START = if patLen == 0 then return $ Skip $ GO_EMPTY_PAT state else if patLen == 1 - then return $ Skip $ GO_SINGLE_PAT state + then do + acc <- initial + pat <- liftIO $ A.unsafeIndexIO patArr 0 + return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat else if sizeOf (undefined :: a) * patLen - <= sizeOf (undefined :: Word) - then return $ Skip $ GO_SHORT_PAT state - else do - (rb, rhead) <- liftIO $ RB.new patLen - return $ Skip $ GO_KARP_RABIN state rb rhead - - stepOuter gst (GO_SINGLE_PAT stt) = do - -- This first part is the only difference between splitOn and - -- splitSuffixOn. - -- If the last element is a separator do not issue a blank segment. - res <- step (adaptState gst) stt + <= sizeOf (undefined :: Word) + then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) + else undefined + {- do + (rb, rhead) <- liftIO $ RB.new patLen + return $ Skip $ GO_KARP_RABIN_ACCUM 0 state rb rhead -} + stepOuter _ (GO_YIELD x ns) = return $ Yield x ns + stepOuter _ GO_DONE = return Stop + --------------------------- + -- Empty pattern + --------------------------- + stepOuter _ (GO_EMPTY_PAT_WITH s x) = do + ini <- initial + res <- fstep ini x case res of - Yield x s -> do - acc <- initial - if sngPat == x - then if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - done sres >>= \r -> - return $ Yield r (GO_SINGLE_PAT s) - FL.Done bres -> - return $ Yield bres (GO_SINGLE_PAT s) - else done acc >>= \r -> - return $ Yield r (GO_SINGLE_PAT s) - else do - fres <- fstep acc x - case fres of - FL.Partial sres -> go SPEC s sres - FL.Done bres -> go' SPEC s bres - Skip s -> return $ Skip $ (GO_SINGLE_PAT s) - Stop -> return Stop - - where - - go !_ st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - if sngPat == x - then if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - done sres >>= \r -> - return $ Yield r (GO_SINGLE_PAT s) - FL.Done bres -> - return $ Yield bres (GO_SINGLE_PAT s) - else done acc >>= \r -> - return $ Yield r (GO_SINGLE_PAT s) - else do - acc' <- fstep acc x - case acc' of - FL.Partial sres -> go SPEC s sres - FL.Done bres -> go' SPEC s bres - Skip s -> go SPEC s acc - Stop -> done acc >>= \r -> return $ Yield r GO_DONE - - go' !_ st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - if sngPat == x - then return $ Yield acc (GO_SINGLE_PAT s) - else go' SPEC s acc - Skip s -> go' SPEC s acc - Stop -> return $ Yield acc GO_DONE - - stepOuter gst (GO_SHORT_PAT stt) = do - - -- Call "initial" only if the stream yields an element, otherwise we - -- may call "initial" but never yield anything. initial may produce a - -- side effect, therefore we will end up doing and discard a side - -- effect. - - let idx = 0 - let wrd = 0 - res <- step (adaptState gst) stt + FL.Partial sres -> do + r <- done sres + return $ Skip $ GO_YIELD r (GO_EMPTY_PAT s) + FL.Done bres -> return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT s) + FL.Done1 bres -> + return $ Skip $ GO_YIELD bres (GO_EMPTY_PAT_WITH s x) + stepOuter gst (GO_EMPTY_PAT st) = do + res <- step (adaptState gst) st + return + $ case res of + Yield x s -> Skip $ GO_EMPTY_PAT_WITH s x + Skip s -> Skip $ GO_EMPTY_PAT s + Stop -> Stop + ----------------- + -- Single Pattern + ----------------- + -- XXX These functions cab be designed in a better way? Get rid of go and + -- have more info in the state but you would be relying on GHC for + -- simplification. + stepOuter gst (GO_SINGLE_PAT_BEGIN st pat) = do + res <- step (adaptState gst) st case res of Yield x s -> do - acc <- initial - let wrd' = addToWord wrd x - if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if idx == maxIndex - then do - if wrd' .&. mask == patWord - then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go0 SPEC (idx + 1) wrd' s sres - else go0 SPEC (idx + 1) wrd' s sres - FL.Done bres -> - if idx == maxIndex - then do - if wrd' .&. mask == patWord - then return $ Yield bres (GO_SHORT_PAT s) - else go0' SPEC (idx + 1) wrd' s bres - else go0' SPEC (idx + 1) wrd' s bres - else if idx == maxIndex - then do - if wrd' .&. mask == patWord - then done acc >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go0 SPEC (idx + 1) wrd' s acc - else go0 SPEC (idx + 1) wrd' s acc - Skip s -> return $ Skip (GO_SHORT_PAT s) + ini <- initial + return $ Skip $ GO_SINGLE_PAT_WITH ini s pat x + Skip s -> return $ Skip $ GO_SINGLE_PAT_BEGIN s pat Stop -> return Stop - - where - - mask :: Word - mask = (1 `shiftL` (elemBits * patLen)) - 1 - - addToWord wrd a = (wrd `shiftL` elemBits) .|. fromIntegral (fromEnum a) - - patWord :: Word - patWord = mask .&. A.foldl' addToWord 0 patArr - - go0 !_ !idx wrd st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd' = addToWord wrd x - -- XXX Eliminate common code - if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if idx == maxIndex - then do - if wrd' .&. mask == patWord - then do - r <- done sres - return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s sres - else go0 SPEC (idx + 1) wrd' s sres - FL.Done bres -> - if idx == maxIndex - then do - if wrd' .&. mask == patWord - then return $ Yield bres (GO_SHORT_PAT s) - else go1' SPEC wrd' s bres - else go0' SPEC (idx + 1) wrd' s bres - else if idx == maxIndex - then do - if wrd' .&. mask == patWord - then do - r <- done acc - return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s acc - else go0 SPEC (idx + 1) wrd' s acc - Skip s -> go0 SPEC idx wrd s acc - Stop -> do - if (idx == maxIndex) && (wrd .&. mask == patWord) - then return Stop - else if idx /= 0 && not withSep - then go2 wrd idx acc - else done acc >>= \r -> return $ Yield r GO_DONE - - go0' !_ !idx wrd st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd' = addToWord wrd x - if idx == maxIndex - then do - if wrd' .&. mask == patWord - then return $ Yield acc (GO_SHORT_PAT s) - else go1' SPEC wrd' s acc - else go0' SPEC (idx + 1) wrd' s acc - Skip s -> go0' SPEC idx wrd s acc - Stop -> do - if (idx == maxIndex) && (wrd .&. mask == patWord) - then return Stop - else return $ Yield acc GO_DONE - - {-# INLINE go1 #-} - go1 !_ wrd st !acc = do - res <- step (adaptState gst) st + stepOuter _ (GO_SINGLE_PAT_WITH fs s pat x) = + if withSep + then do + res <- fstep fs x case res of - Yield x s -> do - let wrd' = addToWord wrd x - old = (mask .&. wrd) `shiftR` (elemBits * (patLen - 1)) - if withSep + FL.Partial sres -> + if pat == x then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if wrd' .&. mask == patWord - then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s sres - FL.Done bres -> - if wrd' .&. mask == patWord - then return $ Yield bres (GO_SHORT_PAT s) - else go1' SPEC wrd' s bres - else do - fres <- fstep acc (toEnum $ fromIntegral old) - case fres of - FL.Partial sres -> - if wrd' .&. mask == patWord - then done sres >>= \r -> return $ Yield r (GO_SHORT_PAT s) - else go1 SPEC wrd' s sres - FL.Done bres -> - if wrd' .&. mask == patWord - then return $ Yield bres (GO_SHORT_PAT s) - else go1' SPEC wrd' s bres - Skip s -> go1 SPEC wrd s acc - Stop -> - -- If the last sequence is a separator do not issue a blank - -- segment. - if wrd .&. mask == patWord - then return Stop - else if withSep - then done acc >>= \r -> return $ Yield r GO_DONE - else go2 wrd patLen acc - - {-# INLINE go1' #-} - go1' !_ wrd st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - let wrd' = addToWord wrd x - if wrd' .&. mask == patWord - then return $ Yield acc (GO_SHORT_PAT s) - else go1' SPEC wrd' s acc - Skip s -> go1' SPEC wrd s acc - Stop -> - -- If the last sequence is a separator do not issue a blank - -- segment. - if wrd .&. mask == patWord - then return Stop - else return $ Yield acc GO_DONE - - go2 !wrd !n !acc | n > 0 = do - let old = (mask .&. wrd) `shiftR` (elemBits * (n - 1)) - fres <- fstep acc (toEnum $ fromIntegral old) - case fres of - FL.Partial sres -> go2 wrd (n - 1) sres - FL.Done bres -> return $ Yield bres GO_DONE - go2 _ _ acc = done acc >>= \r -> return (Yield r GO_DONE) - - stepOuter gst (GO_KARP_RABIN stt rb rhead) = do - let idx = 0 - res <- step (adaptState gst) stt + r <- done sres + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat) + else return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + FL.Done bres -> + return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat) + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) + else if pat == x + then do + r <- done fs + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat) + else do + res <- fstep fs x + case res of + FL.Partial sres -> + return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + FL.Done bres -> + return + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat) + FL.Done1 bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) + stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat) = do + res <- step (adaptState gst) st + case res of + Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat x + Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat + Stop -> do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE + --------------------------- + -- Short Pattern - Shift Or + --------------------------- + -- XXX mask can be omitted in most of the places? + stepOuter gst (GO_SHORT_PAT_ACCUM idx st wrd) = do + res <- step (adaptState gst) st case res of Yield x s -> do - acc <- initial - rh' <- liftIO (RB.unsafeInsert rb rhead x) - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if withSep + let wrd1 = addToWord wrd x + if idx == maxIndex + then if wrd1 .&. mask == pat + then do + ini <- initial + if withSep + then return + $ Skip + $ GO_SHORT_PAT_YIELD_SEP (idx + 1) ini s wrd1 + else do + r <- done ini + return + $ Skip + $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + else do + ini <- initial + return $ Skip $ GO_SHORT_PAT_NEXT ini s wrd1 + else return $ Skip $ GO_SHORT_PAT_ACCUM (idx + 1) s wrd1 + Skip s -> return $ Skip $ GO_SHORT_PAT_ACCUM idx s wrd + -- XXX idx >= 1 + Stop -> + if idx /= 0 + then do + ini <- initial + return $ Skip $ GO_SHORT_PAT_DRAIN idx ini wrd + else return $ Stop + stepOuter _ (GO_SHORT_PAT_NEXT_WITH fs s x wrd) = do + let wrd1 = mask .&. addToWord wrd x + old = wrd `shiftR` (elemBits * (patLen - 1)) + fres <- fstep fs (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + -- XXX Removing the then branch increases the + -- performance by 150% + if wrd1 .&. mask == pat + then if withSep + then return + $ Skip $ GO_SHORT_PAT_YIELD_SEP patLen sres s wrd1 + else do + r <- done sres + return + $ Skip + $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + else return $ Skip $ GO_SHORT_PAT_NEXT sres s wrd1 + -- If the fold terminates then the behaviour is same as if + -- the pattern is matched. We should not ignore the + -- currently stored pattern though. + FL.Done bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_NEXT ini s wrd1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_NEXT_WITH ini s x wrd + stepOuter gst (GO_SHORT_PAT_NEXT fs st wrd) = do + res <- step (adaptState gst) st + return + $ Skip + $ case res of + Yield x s -> GO_SHORT_PAT_NEXT_WITH fs s x wrd + Skip s -> GO_SHORT_PAT_NEXT fs s wrd + Stop -> GO_SHORT_PAT_DRAIN patLen fs wrd + -- XXX Check if this is correct? Consider n == 1, fromIntegral (mask + -- .&. word) is considering more than one element. We should change the mask + -- to ignore the previous elements. + -- XXX I'm defining a few operational semantics here of what should happen + -- if the fold terminates in the middle of a pattern. We can discuss on this + -- if required. + stepOuter _ (GO_SHORT_PAT_YIELD_SEP 0 fs s _) = do + r <- done fs + return $ Skip $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) + stepOuter _ (GO_SHORT_PAT_YIELD_SEP n fs s wrd) = do + let old = wrd `shiftR` (elemBits * (n - 1)) + mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask .&. wrd + fres <- fstep fs (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + return $ Skip $ GO_SHORT_PAT_YIELD_SEP (n - 1) sres s wrd1 + FL.Done bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_SHORT_PAT_YIELD_SEP (n - 1) ini s wrd1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_YIELD_SEP n ini s wrd + stepOuter _ (GO_SHORT_PAT_DRAIN 0 fs _) = do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE + stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do + let old = wrd `shiftR` (elemBits * (n - 1)) + mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask .&. wrd + fres <- fstep fs (toEnum $ fromIntegral old) + case fres of + FL.Partial sres -> + return $ Skip $ GO_SHORT_PAT_DRAIN (n - 1) sres wrd1 + FL.Done bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_DRAIN (n - 1) ini wrd1 + FL.Done1 bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_SHORT_PAT_DRAIN n ini wrd + ------------------------------- + -- General Pattern - Karp Rabin + ------------------------------- + stepOuter gst (GO_KARP_RABIN_ACCUM idx st rb rh) = do + res <- step (adaptState gst) st + case res of + Yield x s -> do + rh1 <- liftIO $ RB.unsafeInsert rb rh x + if idx == maxIndex then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if idx == maxIndex - then if ringHash == patHash - then go2 SPEC ringHash rh' s sres - else go0 SPEC (idx + 1) rh' s sres - else go0 SPEC (idx + 1) rh' s sres - FL.Done bres -> - if idx == maxIndex - then if ringHash == patHash - then go2' SPEC ringHash rh' s bres - else go0' SPEC (idx + 1) rh' s bres - else go0' SPEC (idx + 1) rh' s bres - else if idx == maxIndex - then if ringHash == patHash - then go2 SPEC ringHash rh' s acc - else go0 SPEC (idx + 1) rh' s acc - else go0 SPEC (idx + 1) rh' s acc - Skip s -> return $ Skip (GO_KARP_RABIN s rb rhead) - Stop -> return Stop - - where - - k = 2891336453 :: Word32 - coeff = k ^ patLen - addCksum cksum a = cksum * k + fromIntegral (fromEnum a) - deltaCksum cksum old new = - addCksum cksum new - coeff * fromIntegral (fromEnum old) - - -- XXX shall we use a random starting hash or 1 instead of 0? - patHash = A.foldl' addCksum 0 patArr - - -- XXX Have a terminating fold for folding a Ring to eliminate these - liftStep lstep (FL.Partial s) a = lstep s a - liftStep _ x _ = return x - liftExtract _ (FL.Done b) = return b - liftExtract ldone (FL.Partial s) = ldone s - - -- rh == ringHead - go0 !_ !idx !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - rh' <- liftIO (RB.unsafeInsert rb rh x) - let fold = RB.unsafeFoldRing (RB.ringBound rb) - let !ringHash = fold addCksum 0 rb - if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if idx == maxIndex - then do - if ringHash == patHash - then go2 SPEC ringHash rh' s sres - else go1 SPEC ringHash rh' s sres - else go0 SPEC (idx + 1) rh' s sres - - FL.Done bres -> - if idx == maxIndex - then do - if ringHash == patHash - then go2' SPEC ringHash rh' s bres - else go1' SPEC ringHash rh' s bres - else go0' SPEC (idx + 1) rh' s bres - - else if idx == maxIndex - then do - if ringHash == patHash - then go2 SPEC ringHash rh' s acc - else go1 SPEC ringHash rh' s acc - else go0 SPEC (idx + 1) rh' s acc - Skip s -> go0 SPEC idx rh s acc - Stop -> do - -- do not issue a blank segment when we end at pattern - if (idx == maxIndex) && RB.unsafeEqArray rb rh patArr - then return Stop - else do - !acc' <- if idx /= 0 && not withSep - then RB.unsafeFoldRingM rh (liftStep fstep) (FL.Partial acc) rb - else return $ FL.Partial acc - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - go0' !_ !idx !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - rh' <- liftIO (RB.unsafeInsert rb rh x) let fold = RB.unsafeFoldRing (RB.ringBound rb) let !ringHash = fold addCksum 0 rb - if idx == maxIndex - then do - if ringHash == patHash - then go2' SPEC ringHash rh' s acc - else go1' SPEC ringHash rh' s acc - else go0' SPEC (idx + 1) rh' s acc - Skip s -> go0' SPEC idx rh s acc - Stop -> do - -- do not issue a blank segment when we end at pattern - if (idx == maxIndex) && RB.unsafeEqArray rb rh patArr - then return Stop - -- Is this behaviour correct? - else return $ Yield acc GO_DONE - - -- XXX Theoretically this code can do 4 times faster if GHC generates - -- optimal code. If we use just "(cksum' == patHash)" condition it goes - -- 4x faster, as soon as we add the "RB.unsafeEqArray rb v" condition - -- the generated code changes drastically and becomes 4x slower. Need - -- to investigate what is going on with GHC. - {-# INLINE go1 #-} - go1 !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - if withSep - then do - fres <- fstep acc x - case fres of - FL.Partial sres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s sres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s sres - FL.Done bres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2' SPEC cksum' rh' s bres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s bres - else do - fres <- fstep acc old - case fres of - FL.Partial sres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2 SPEC cksum' rh' s sres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1 SPEC cksum' rh' s sres - FL.Done bres -> - if (cksum' == patHash) - then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2' SPEC cksum' rh' s bres - else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s bres - Skip s -> go1 SPEC cksum rh s acc - Stop -> do - if RB.unsafeEqArray rb rh patArr - then return Stop - else do - acc' <- if withSep - then return (FL.Partial acc) - else RB.unsafeFoldRingFullM rh (liftStep fstep) (FL.Partial acc) rb - liftExtract done acc' >>= \r -> return $ Yield r GO_DONE - - go1' !_ !cksum !rh st !acc = do - res <- step (adaptState gst) st - case res of - Yield x s -> do - old <- liftIO $ peek rh - let cksum' = deltaCksum cksum old x - if (cksum' == patHash) + if ringHash == patHash then do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go2' SPEC cksum' rh' s acc + ini <- initial + if withSep + then return + $ Skip + $ GO_KARP_RABIN_YIELD_SEP (idx + 1) ini s rb rh1 + else do + r <- done ini + return + $ Skip + $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh1 else do - rh' <- liftIO (RB.unsafeInsert rb rh x) - go1' SPEC cksum' rh' s acc - Skip s -> go1' SPEC cksum rh s acc - Stop -> do - if RB.unsafeEqArray rb rh patArr - then return Stop - else return $ Yield acc GO_DONE - - go2 !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then do - r <- done acc' - return $ Yield r (GO_KARP_RABIN s rb rhead) - else go1 SPEC cksum' rh' s acc' - - go2' !_ !cksum' !rh' s !acc' = do - if RB.unsafeEqArray rb rh' patArr - then return $ Yield acc' (GO_KARP_RABIN s rb rhead) - else go1' SPEC cksum' rh' s acc' - - stepOuter gst (GO_EMPTY_PAT st) = do + ini <- initial + return $ Skip $ GO_KARP_RABIN_NEXT ini s rb rh1 ringHash + else return $ Skip $ GO_KARP_RABIN_ACCUM (idx + 1) s rb rh1 + Skip s -> return $ Skip $ GO_KARP_RABIN_ACCUM idx s rb rh + Stop -> do + if idx /= 0 + then do + ini <- initial + let rh1 = RB.moveBy (0 - idx) rb (rh :: Ptr a) + return $ Skip $ GO_KARP_RABIN_DRAIN idx ini rb rh1 + else return $ Stop + -- XXX Theoretically this code can do 4 times faster if GHC generates + -- optimal code. If we use just "(cksum1 == patHash)" condition it goes 4x + -- faster, as soon as we add the "RB.unsafeEqArray rb v" condition the + -- generated code changes drastically and becomes 4x slower. Need to + -- investigate what is going on with GHC. + stepOuter _ (GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum) = do + old <- liftIO $ peek rh + let cksum1 = deltaCksum cksum old x + fres <- fstep fs old + case fres of + FL.Partial sres -> + if (cksum1 == patHash) + then if withSep + then do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + return + $ Skip $ GO_KARP_RABIN_YIELD_SEP patLen sres s rb rh1 + else do + r <- done sres + -- XXX It does not matter whether we give rh or rh1 + -- here. We restart anyway. + return + $ Skip $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh + else do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + return $ Skip $ GO_KARP_RABIN_NEXT sres s rb rh1 cksum1 + FL.Done bres -> do + rh1 <- liftIO (RB.unsafeInsert rb rh x) + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_NEXT ini s rb rh1 cksum1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_NEXT_WITH ini s x rb rh cksum + stepOuter gst (GO_KARP_RABIN_NEXT fs st rb rh cksum) = do res <- step (adaptState gst) st - case res of - Yield x s -> do - acc <- initial - acc' <- fstep acc x - case acc' of - FL.Partial sres -> - done sres >>= \r -> return $ Yield r (GO_EMPTY_PAT s) - FL.Done bres -> return $ Yield bres (GO_EMPTY_PAT s) - Skip s -> return $ Skip (GO_EMPTY_PAT s) - Stop -> return Stop - - stepOuter _ GO_DONE = return Stop --} + return + $ Skip + $ case res of + Yield x s -> GO_KARP_RABIN_NEXT_WITH fs s x rb rh cksum + Skip s -> GO_KARP_RABIN_NEXT fs s rb rh cksum + Stop -> GO_KARP_RABIN_DRAIN patLen fs rb rh + -- XXX I'm defining a few operational semantics here of what should happen + -- if the fold terminates in the middle of a pattern. We can discuss on this + -- if required. + stepOuter _ (GO_KARP_RABIN_YIELD_SEP 0 fs s rb rh) = do + r <- done fs + return $ Skip $ GO_YIELD r $ GO_KARP_RABIN_ACCUM 0 s rb rh + stepOuter _ (GO_KARP_RABIN_YIELD_SEP n fs s rb rh) = do + old <- liftIO $ peek rh + let rh1 = RB.advance rb rh + fres <- fstep fs old + case fres of + FL.Partial sres -> + return $ Skip $ GO_KARP_RABIN_YIELD_SEP (n - 1) sres s rb rh1 + FL.Done bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_YIELD_SEP (n - 1) ini s rb rh1 + FL.Done1 bres -> do + ini <- initial + return + $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_YIELD_SEP n ini s rb rh + stepOuter _ (GO_KARP_RABIN_DRAIN 0 fs _ _) = do + r <- done fs + return $ Skip $ GO_YIELD r GO_DONE + stepOuter _ (GO_KARP_RABIN_DRAIN n fs rb rh) = do + old <- liftIO $ peek rh + let rh1 = RB.advance rb rh + fres <- fstep fs old + case fres of + FL.Partial sres -> + return $ Skip $ GO_KARP_RABIN_DRAIN (n - 1) sres rb rh1 + FL.Done bres -> do + ini <- initial + return + $ Skip + $ GO_YIELD bres $ GO_KARP_RABIN_DRAIN (n - 1) ini rb rh1 + FL.Done1 bres -> do + ini <- initial + return $ Skip $ GO_YIELD bres $ GO_KARP_RABIN_DRAIN n ini rb rh data SplitState s arr = SplitInitial s From 67de32bce145e73caf69b726df75a303416ec586 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 11 Sep 2020 07:23:18 +0530 Subject: [PATCH 16/30] fixup! splitSuffixOn --- src/Streamly/Internal/Data/Stream/StreamD.hs | 50 ++++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index bae0cfc69c..df220f36c9 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1841,8 +1841,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r GO_DONE stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask .&. wrd + mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask_ .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> @@ -2014,8 +2014,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = else if patLen == 1 then do acc <- initial - pat <- liftIO $ A.unsafeIndexIO patArr 0 - return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat + pat_ <- liftIO $ A.unsafeIndexIO patArr 0 + return $ Skip $ GO_SINGLE_PAT_NEXT acc state pat_ else if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) @@ -2051,53 +2051,53 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = -- XXX These functions cab be designed in a better way? Get rid of go and -- have more info in the state but you would be relying on GHC for -- simplification. - stepOuter gst (GO_SINGLE_PAT_BEGIN st pat) = do + stepOuter gst (GO_SINGLE_PAT_BEGIN st pat_) = do res <- step (adaptState gst) st case res of Yield x s -> do ini <- initial - return $ Skip $ GO_SINGLE_PAT_WITH ini s pat x - Skip s -> return $ Skip $ GO_SINGLE_PAT_BEGIN s pat + return $ Skip $ GO_SINGLE_PAT_WITH ini s pat_ x + Skip s -> return $ Skip $ GO_SINGLE_PAT_BEGIN s pat_ Stop -> return Stop - stepOuter _ (GO_SINGLE_PAT_WITH fs s pat x) = + stepOuter _ (GO_SINGLE_PAT_WITH fs s pat_ x) = if withSep then do res <- fstep fs x case res of FL.Partial sres -> - if pat == x + if pat_ == x then do r <- done sres - return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat) - else return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat_) + else return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat_) FL.Done bres -> - return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat) + return $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat_) FL.Done1 bres -> do ini <- initial return - $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) - else if pat == x + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat_ x) + else if pat_ == x then do r <- done fs - return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat) + return $ Skip $ GO_YIELD r (GO_SINGLE_PAT_BEGIN s pat_) else do res <- fstep fs x case res of FL.Partial sres -> - return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat) + return $ Skip $ (GO_SINGLE_PAT_NEXT sres s pat_) FL.Done bres -> return - $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat) + $ Skip $ GO_YIELD bres (GO_SINGLE_PAT_BEGIN s pat_) FL.Done1 bres -> do ini <- initial return $ Skip - $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat x) - stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat) = do + $ GO_YIELD bres (GO_SINGLE_PAT_WITH ini s pat_ x) + stepOuter gst (GO_SINGLE_PAT_NEXT fs st pat_) = do res <- step (adaptState gst) st case res of - Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat x - Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat + Yield x s -> return $ Skip $ GO_SINGLE_PAT_WITH fs s pat_ x + Skip s -> return $ Skip $ GO_SINGLE_PAT_NEXT fs s pat_ Stop -> do r <- done fs return $ Skip $ GO_YIELD r GO_DONE @@ -2182,8 +2182,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) stepOuter _ (GO_SHORT_PAT_YIELD_SEP n fs s wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask .&. wrd + mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask_ .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> @@ -2202,8 +2202,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r GO_DONE stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask .&. wrd + mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask_ .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> From f0c6f8f624665b2ffe64b78517f35d02795c2a47 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 11 Sep 2020 16:51:36 +0530 Subject: [PATCH 17/30] fixup! fixup! splitSuffixOn --- src/Streamly/Internal/Data/Stream/StreamD.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index df220f36c9..b972726b34 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1841,8 +1841,8 @@ splitOn patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r GO_DONE stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask_ .&. wrd + mask1 = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask1 .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> @@ -2182,8 +2182,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r $ GO_SHORT_PAT_ACCUM 0 s (0 :: Word) stepOuter _ (GO_SHORT_PAT_YIELD_SEP n fs s wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask_ .&. wrd + mask1 = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask1 .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> @@ -2202,8 +2202,8 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = return $ Skip $ GO_YIELD r GO_DONE stepOuter _ (GO_SHORT_PAT_DRAIN n fs wrd) = do let old = wrd `shiftR` (elemBits * (n - 1)) - mask_ = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word - wrd1 = mask_ .&. wrd + mask1 = (1 `shiftL` (elemBits * (n - 1))) - 1 :: Word + wrd1 = mask1 .&. wrd fres <- fstep fs (toEnum $ fromIntegral old) case fres of FL.Partial sres -> From ad21107b4a61ad9efa58e0b2ff8418886535fef0 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 11 Sep 2020 18:17:53 +0530 Subject: [PATCH 18/30] fixup! fixup! fixup! splitSuffixOn --- benchmark/Streamly/Benchmark/Data/Fold.hs | 1 + src/Streamly/Internal/Data/Stream/StreamD.hs | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/Streamly/Benchmark/Data/Fold.hs b/benchmark/Streamly/Benchmark/Data/Fold.hs index 0de7840159..a6523f99cf 100644 --- a/benchmark/Streamly/Benchmark/Data/Fold.hs +++ b/benchmark/Streamly/Benchmark/Data/Fold.hs @@ -20,6 +20,7 @@ import Prelude (IO, Int, Double, String, (>), (<$>), (+), ($), import Data.Map.Strict (Map) import Streamly.Internal.Data.Fold (Fold(..)) +import Streamly.Internal.Data.Stream.IsStream (SerialT) import qualified Data.Map.Strict as Map diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index b972726b34..c08af35033 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -2019,10 +2019,9 @@ splitSuffixOn withSep patArr (Fold fstep initial done) (Stream step state) = else if sizeOf (undefined :: a) * patLen <= sizeOf (undefined :: Word) then return $ Skip $ GO_SHORT_PAT_ACCUM 0 state (0 :: Word) - else undefined - {- do + else do (rb, rhead) <- liftIO $ RB.new patLen - return $ Skip $ GO_KARP_RABIN_ACCUM 0 state rb rhead -} + return $ Skip $ GO_KARP_RABIN_ACCUM 0 state rb rhead stepOuter _ (GO_YIELD x ns) = return $ Yield x ns stepOuter _ GO_DONE = return Stop --------------------------- From 6d0e7bf762b786fdfeb1d78f07f1f00df2cfdca6 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sat, 12 Sep 2020 00:36:49 +0530 Subject: [PATCH 19/30] Indentation --- .../Internal/Data/Array/Storable/Foreign/Mut/Types.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs index 21f9835451..32027a44f5 100644 --- a/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs +++ b/src/Streamly/Internal/Data/Array/Storable/Foreign/Mut/Types.hs @@ -1149,9 +1149,13 @@ writeNUnsafe n = Fold step initial extract initial = do (Array start end _) <- liftIO $ newArray (max n 0) return $ ArrayUnsafe start end + step (ArrayUnsafe start end) x = do liftIO $ poke end x - return $ FL.Partial $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) + return + $ FL.Partial + $ ArrayUnsafe start (end `plusPtr` sizeOf (undefined :: a)) + extract (ArrayUnsafe start end) = return $ Array start end end -- liftIO . shrinkToFit -- XXX The realloc based implementation needs to make one extra copy if we use From 0d26fd1fe9af0524c3cdd1a2325ea747c0c9631b Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sun, 13 Sep 2020 20:36:54 +0530 Subject: [PATCH 20/30] Interim review --- .../Data/Array/Storable/Foreign/Types.hs | 1 + src/Streamly/Internal/Data/Fold.hs | 28 +++++++++++++------ .../Internal/Data/Stream/StreamD/Type.hs | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Streamly/Internal/Data/Array/Storable/Foreign/Types.hs b/src/Streamly/Internal/Data/Array/Storable/Foreign/Types.hs index 666722ea2e..12f4b3a2f1 100644 --- a/src/Streamly/Internal/Data/Array/Storable/Foreign/Types.hs +++ b/src/Streamly/Internal/Data/Array/Storable/Foreign/Types.hs @@ -333,6 +333,7 @@ fromStreamDN limit str = unsafeFreeze <$> MA.fromStreamDN limit str -- Streams of arrays ------------------------------------------------------------------------------- +-- XXX Investigare performance. fusion-plugin solves the issue though -- | @fromStreamArraysOf n stream@ groups the input stream into a stream of -- arrays of size n. {-# INLINE_NORMAL fromStreamDArraysOf #-} diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index a6fccfdc15..bb7fa4e3d5 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -530,15 +530,27 @@ length = genericLength sum :: (Monad m, Num a) => Fold m a a sum = Fold (\x a -> return $ Partial $ x + a) (return 0) return +-- XXX Have a terminating condition here if `a == 0` -- | Determine the product of all elements of a stream of numbers. Returns --- multiplicative identity (@1@) when the stream is empty. +-- multiplicative identity (@1@) when the stream is empty. The fold terminates +-- when it encounters (@0@) in its input. -- -- > product = fmap getProduct $ FL.foldMap Product -- -- @since 0.7.0 -{-# INLINABLE product #-} -product :: (Monad m, Num a) => Fold m a a -product = Fold (\x a -> return $ Partial $ x * a) (return 1) return +{-# INLINE product #-} +product :: (Monad m, Num a, Eq a) => Fold m a a +product = Fold step (return 1) return + + where + + step x a = + return + $ if a == 0 + then Done 0 + else Partial $ x * a + + ------------------------------------------------------------------------------ -- To Summary (Maybe) @@ -548,7 +560,7 @@ product = Fold (\x a -> return $ Partial $ x * a) (return 1) return -- function. -- -- @since 0.7.0 -{-# INLINABLE maximumBy #-} +{-# INLINE maximumBy #-} maximumBy :: Monad m => (a -> a -> Ordering) -> Fold m a (Maybe a) maximumBy cmp = _Fold1 max' @@ -569,14 +581,14 @@ maximumBy cmp = _Fold1 max' -- Compare with @FL.foldMap Max@. -- -- @since 0.7.0 -{-# INLINABLE maximum #-} +{-# INLINE maximum #-} maximum :: (Monad m, Ord a) => Fold m a (Maybe a) maximum = _Fold1 max -- | Computes the minimum element with respect to the given comparison function -- -- @since 0.7.0 -{-# INLINABLE minimumBy #-} +{-# INLINE minimumBy #-} minimumBy :: Monad m => (a -> a -> Ordering) -> Fold m a (Maybe a) minimumBy cmp = _Fold1 min' @@ -597,7 +609,7 @@ minimumBy cmp = _Fold1 min' -- Compare with @FL.foldMap Min@. -- -- @since 0.7.0 -{-# INLINABLE minimum #-} +{-# INLINE minimum #-} minimum :: (Monad m, Ord a) => Fold m a (Maybe a) minimum = _Fold1 min diff --git a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs index 940fbe704d..66cc8a0657 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs @@ -686,6 +686,7 @@ foldMany1 (Fold fstep initial extract) (Stream step state) = step' _ (GroupYield b next) = return $ Yield b next step' _ GroupFinish = return Stop +-- XXX Investigate performance {-# INLINE groupsOf #-} groupsOf :: Monad m => Int -> Fold m a b -> Stream m a -> Stream m b groupsOf n fld = foldMany (FL.ltake n fld) From 86fce93d4ada65321d55c567f23cbc8818d0dbfb Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sun, 13 Sep 2020 22:01:31 +0530 Subject: [PATCH 21/30] Another review commit --- src/Streamly/Internal/Data/Fold.hs | 28 +++++++++++-------- src/Streamly/Internal/Data/Fold/Types.hs | 2 +- src/Streamly/Internal/Data/Parser/ParserD.hs | 8 ++++-- .../Internal/Data/Parser/ParserD/Types.hs | 2 +- src/Streamly/Internal/Data/Stream/IsStream.hs | 2 +- src/Streamly/Internal/Data/Stream/StreamD.hs | 13 ++++----- 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index bb7fa4e3d5..4daf8d0a6e 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -400,7 +400,7 @@ transform (Pipe pstep1 pstep2 pinitial) (Fold fstep finitial fextract) = initial = Tuple' pinitial <$> finitial errorMsgOnDone1 = - "Only only accumulators or folds not returning Done1 are supported by this operation." + "Only folds not returning Done1 are supported by this operation." step (Tuple' ps fs) x = do r <- pstep1 ps x @@ -786,7 +786,8 @@ foldMapM act = Fold step begin done -- | Folds the input stream to a list. -- -- /Warning!/ working on large lists accumulated as buffers in memory could be --- very inefficient, consider using "Streamly.Data.Array.Storable.Foreign" instead. +-- very inefficient, consider using "Streamly.Data.Array.Storable.Foreign" +-- instead. -- -- @since 0.7.0 @@ -1315,6 +1316,8 @@ foldCons f1 f2 = teeWith (:) f1 f2 -- number of consumers? For polymorphic case a vector could be helpful. For -- Storables we can use arrays. Will need separate APIs for those. -- +--XXX This fold terminates when all the folds terminate? +-- -- | Distribute one copy of the stream to each fold and collect the results in -- a container. -- @@ -1355,7 +1358,8 @@ toUnitState (Done1 _) = Done1 () -- XXX This is 2x times faster wiithout the terminating condition. -- | Like 'distribute' but for folds that return (), this can be more efficient --- than 'distribute' as it does not need to maintain state. +-- than 'distribute' as it does not need to maintain state. This fold terminates +-- when all the folds terminate. -- {-# INLINE distribute_ #-} distribute_ :: Monad m => [Fold m a ()] -> Fold m a () @@ -1461,17 +1465,19 @@ partitionByM f (Fold stepL beginL doneL) (Fold stepR beginR doneR) = Left b -> do res <- stepL sL b return + $ Partial $ case res of - Partial sres -> Partial $ RunBoth sres sR - Done bres -> Partial $ RunRight bres sR - Done1 bres -> Partial $ RunRight bres sR + Partial sres -> RunBoth sres sR + Done bres -> RunRight bres sR + Done1 bres -> RunRight bres sR Right c -> do res <- stepR sR c return + $ Partial $ case res of - Partial sres -> Partial $ RunBoth sL sres - Done bres -> Partial $ RunLeft sL bres - Done1 bres -> Partial $ RunLeft sL bres + Partial sres -> RunBoth sL sres + Done bres -> RunLeft sL bres + Done1 bres -> RunLeft sL bres step (RunLeft sL bR) a = do r <- f a case r of @@ -2050,13 +2056,13 @@ toParallelSVar svar winfo = Fold step initial extract initial = return () + -- XXX we can have a separate fold for unlimited buffer case to avoid a + -- branch in the step here. step () x = liftIO $ do decrementBufferLimit svar void $ send svar (ChildYield x) return $ FL.Partial () - -- XXX we can have a separate fold for unlimited buffer case to avoid a - -- branch in the step here. extract () = liftIO $ sendStop svar winfo diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index c1f85e393d..aec45cd621 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -155,7 +155,6 @@ module Streamly.Internal.Data.Fold.Types ) where -import Data.Bifunctor (Bifunctor(..)) import Control.Applicative (liftA2) import Control.Concurrent (threadDelay, forkIO, killThread) import Control.Concurrent.MVar (MVar, newMVar, swapMVar, readMVar) @@ -163,6 +162,7 @@ import Control.Exception (SomeException(..), catch, mask) import Control.Monad (void) import Control.Monad.IO.Class (MonadIO(..)) import Control.Monad.Trans.Control (control) +import Data.Bifunctor (Bifunctor(..)) import Data.Maybe (isJust, fromJust) #if __GLASGOW_HASKELL__ < 808 import Data.Semigroup (Semigroup(..)) diff --git a/src/Streamly/Internal/Data/Parser/ParserD.hs b/src/Streamly/Internal/Data/Parser/ParserD.hs index 83db426325..bef4e71c8d 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -300,6 +300,8 @@ either parser = Parser step initial extract -- Taking elements ------------------------------------------------------------------------------- +-- XXX Convert this to `take :: Int -> Fold m a b -> Parser m a b` +-- It will be inconsistent with other takeish combinators. -- This is takeLE -- | See 'Streamly.Internal.Data.Parser.take'. -- @@ -321,6 +323,7 @@ takeEQ cnt (Fold step initial extract) = Parser step' initial' extract' where n = max cnt 0 + initial' = Tuple' 0 <$> initial step' (Tuple' i r) a @@ -690,9 +693,8 @@ manyTill (Fold fstep finitial fextract) l <- initialR return $ Partial n (ManyTillR 0 fs1 l) FL.Done fb -> return $ Done n fb - FL.Done1 fb -> do - assert (cnt + 1 - n >= 0) (return ()) - return $ Done (cnt + 1) fb + -- Keep a count of elements + FL.Done1 fb -> error "Done1 nore supported in manyTill" Error err -> return $ Error err extract (ManyTillL _ fs sR) = do diff --git a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs index a75c794c29..eabdf6e629 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs @@ -469,8 +469,8 @@ splitMany (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = Error _ -> do xs <- fextract fs return $ Done cnt1 xs - -- XXX The "try" may impact performance if this parser is used as a scan + -- XXX The "try" may impact performance if this parser is used as a scan extract (Tuple3' s _ fs) = do r <- try $ extract1 s case r of diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index 1035cb84e0..43bcd25487 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -4653,7 +4653,7 @@ data SessionState t m k a b = SessionState #undef Type -- XXX Perhaps we should use an "Event a" type to represent timestamped data. --- XXX I've replaced it with the most natural implementation. Check logic. +-- XXX Recheck this! -- | @classifySessionsBy tick timeout idle pred f stream@ groups timestamped -- events in an input event stream into sessions based on a session key. Each -- element in the input stream is an event consisting of a triple @(session key, diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index c08af35033..61f43136e9 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1532,7 +1532,6 @@ groupsRollingBy cmp f = foldMany (FL.groupByRolling cmp f) splitBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b splitBy predicate f = foldMany (FL.sliceSepBy predicate f) --- XXX requires -funfolding-use-threshold=150 in lines-unlines benchmark {-# INLINE_NORMAL splitSuffixBy #-} splitSuffixBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b @@ -1544,11 +1543,11 @@ data WordsByState fs s a b | WordBeginWith !s !a | WordFold !fs !s -{-# INLINE_NORMAL wordsBy #-} -wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b -- This has a hard time fusing even with simple pipeline. -- wordsBy predicate f = foldMany (FL.sliceSepTill predicate f) -- XXX Check this +{-# INLINE_NORMAL wordsBy #-} +wordsBy :: Monad m => (a -> Bool) -> Fold m a b -> Stream m a -> Stream m b wordsBy predicate (Fold fstep initial done) (Stream step state) = Stream step1 (WordBegin state) @@ -3943,9 +3942,9 @@ tap (Fold fstep initial extract) (Stream step state) = Stream step' TapInit r <- step gst st case r of Yield x s -> do - acc' <- fstep acc x + acc1 <- fstep acc x return - $ case acc' of + $ case acc1 of FL.Partial sres -> Yield x (Tapping sres s) FL.Done _ -> Yield x (TapDone s) FL.Done1 _ -> Yield x (TapDone s) @@ -3981,9 +3980,9 @@ tapOffsetEvery offset n (Fold fstep initial extract) (Stream step state) = r <- step gst st case r of Yield x s -> do - acc' <- fstep acc x + acc1 <- fstep acc x return - $ case acc' of + $ case acc1 of FL.Partial sres -> Yield x (TapOffTapping sres s (n - 1)) FL.Done _ -> Yield x (TapOffDone s) From 84ffab4d9cb27187f379c39350b606398ad9fd81 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Sun, 13 Sep 2020 22:25:26 +0530 Subject: [PATCH 22/30] fixup! Another review commit --- src/Streamly/Internal/Data/Parser/ParserD.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Streamly/Internal/Data/Parser/ParserD.hs b/src/Streamly/Internal/Data/Parser/ParserD.hs index bef4e71c8d..e9779db9fd 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -694,7 +694,7 @@ manyTill (Fold fstep finitial fextract) return $ Partial n (ManyTillR 0 fs1 l) FL.Done fb -> return $ Done n fb -- Keep a count of elements - FL.Done1 fb -> error "Done1 nore supported in manyTill" + FL.Done1 _ -> error "Done1 nore supported in manyTill" Error err -> return $ Error err extract (ManyTillL _ fs sR) = do From 5340ed26d0dd7a9ce49685f9af1feb76186161a2 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Mon, 14 Sep 2020 08:44:57 +0530 Subject: [PATCH 23/30] Test fix --- test/Streamly/Test/Internal/Data/Parser.hs | 43 +++++++++++++--------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/test/Streamly/Test/Internal/Data/Parser.hs b/test/Streamly/Test/Internal/Data/Parser.hs index a46e3b4e2e..45da5335b4 100644 --- a/test/Streamly/Test/Internal/Data/Parser.hs +++ b/test/Streamly/Test/Internal/Data/Parser.hs @@ -455,14 +455,19 @@ sliceSepByMax = many :: Property many = - forAll (listOf (chooseInt (0, 1))) $ \ls -> - let - concatFold = FL.Fold (\concatList curr_list -> return $ concatList ++ curr_list) (return []) return - prsr = P.many concatFold $ P.sliceSepBy (== 1) FL.toList - in - case S.parse prsr (S.fromList ls) of - Right res_list -> checkListEqual res_list (Prelude.filter (== 0) ls) - Left _ -> property False + forAll (listOf (chooseInt (0, 1))) + $ \ls -> + let concatFold = + FL.Fold + (\concatList curr_list -> + return $ FL.Partial $ concatList ++ curr_list) + (return []) + return + prsr = P.many concatFold $ P.sliceSepBy (== 1) FL.toList + in case S.parse prsr (S.fromList ls) of + Right res_list -> + checkListEqual res_list (Prelude.filter (== 0) ls) + Left _ -> property False -- many_empty :: Property -- many_empty = @@ -472,15 +477,19 @@ many = some :: Property some = - forAll (listOf (chooseInt (0, 1))) $ \genLs -> - let - ls = 0 : genLs - concatFold = FL.Fold (\concatList curr_list -> return $ concatList ++ curr_list) (return []) return - prsr = P.some concatFold $ P.sliceSepBy (== 1) FL.toList - in - case S.parse prsr (S.fromList ls) of - Right res_list -> res_list == Prelude.filter (== 0) ls - Left _ -> False + forAll (listOf (chooseInt (0, 1))) + $ \genLs -> + let ls = 0 : genLs + concatFold = + FL.Fold + (\concatList curr_list -> + return $ FL.Partial $ concatList ++ curr_list) + (return []) + return + prsr = P.some concatFold $ P.sliceSepBy (== 1) FL.toList + in case S.parse prsr (S.fromList ls) of + Right res_list -> res_list == Prelude.filter (== 0) ls + Left _ -> False -- someFail :: Property -- someFail = From 3f88a4eb18ca862b198d5f8b4ae2bca97ee0a7dc Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 17 Sep 2020 09:16:08 +0530 Subject: [PATCH 24/30] Intermediate commit --- src/Streamly/Internal/Data/Fold.hs | 252 ++++++++---------- src/Streamly/Internal/Data/Fold/Types.hs | 53 ++-- src/Streamly/Internal/Data/Parser/ParserD.hs | 6 +- .../Internal/Data/Parser/ParserD/Types.hs | 4 +- src/Streamly/Internal/Data/Stream/IsStream.hs | 3 +- .../Internal/Data/Stream/StreamD/Type.hs | 2 +- 6 files changed, 155 insertions(+), 165 deletions(-) diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index 4daf8d0a6e..b835e787c6 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -233,10 +233,10 @@ import Streamly.Internal.Data.Tuple.Strict (Tuple'(..), Tuple3'(..)) import Streamly.Internal.Data.Maybe.Strict (Maybe'(..), toMaybe) import Streamly.Internal.Data.Either.Strict (Either'(..)) -import qualified Data.List as List import qualified Streamly.Internal.Data.Fold.Types as FL import qualified Streamly.Internal.Data.Pipe.Types as Pipe import qualified Data.Map.Strict as Map +import qualified Data.Set as Set import qualified Prelude import Prelude hiding @@ -370,8 +370,8 @@ sequence (Fold step initial extract) = Fold step' initial extract' res <- step s a case res of Partial x -> return $ Partial x - Done b -> b >>= return . Done - Done1 b -> b >>= return . Done1 + Done b -> Done <$> b + Done1 b -> Done1 <$> b extract' = join . extract @@ -390,6 +390,8 @@ mapM f = sequence . fmap f -- -- | Apply a transformation on a 'Fold' using a 'Pipe'. -- +-- Only folds not returning 'Done1' are supported by this operation. +-- -- @since 0.7.0 {-# INLINE transform #-} transform :: Monad m => Pipe m a b -> Fold m b c -> Fold m a c @@ -550,8 +552,6 @@ product = Fold step (return 1) return then Done 0 else Partial $ x * a - - ------------------------------------------------------------------------------ -- To Summary (Maybe) ------------------------------------------------------------------------------ @@ -735,7 +735,7 @@ sconcat i = Fold (\x a -> return $ Partial $ x <> a) (return i) return -- > S.fold FL.mconcat (S.map Sum $ S.enumerateFromTo 1 10) -- -- @since 0.7.0 -{-# INLINABLE mconcat #-} +{-# INLINE mconcat #-} mconcat :: (Monad m, Monoid a) => Fold m a a mconcat = sconcat mempty @@ -990,7 +990,7 @@ notElem a = all (a /=) -- > and = fmap getAll . FL.foldMap All -- -- @since 0.7.0 -{-# INLINABLE and #-} +{-# INLINE and #-} and :: Monad m => Fold m Bool Bool and = all (== True) @@ -1000,7 +1000,7 @@ and = all (== True) -- > or = fmap getAny . FL.foldMap Any -- -- @since 0.7.0 -{-# INLINABLE or #-} +{-# INLINE or #-} or :: Monad m => Fold m Bool Bool or = any (== True) @@ -1340,65 +1340,47 @@ foldCons f1 f2 = teeWith (:) f1 f2 distribute :: Monad m => [Fold m a b] -> Fold m a [b] distribute = foldr foldCons foldNil --- It would be neater if we use a new type -{-# INLINE combineFoldState #-} -combineFoldState :: Step () () -> Step () () -> Step () () -combineFoldState (Partial ()) _ = Partial () -combineFoldState _ (Partial ()) = Partial () -combineFoldState (Done ()) (Done ()) = Done () -combineFoldState (Done ()) (Done1 ()) = Done () -combineFoldState (Done1 ()) (Done ()) = Done () -combineFoldState (Done1 ()) (Done1 ()) = Done1 () - -{-# INLINE toUnitState #-} -toUnitState :: Step s b -> Step () () -toUnitState (Partial _) = Partial () -toUnitState (Done _) = Done () -toUnitState (Done1 _) = Done1 () - -- XXX This is 2x times faster wiithout the terminating condition. -- | Like 'distribute' but for folds that return (), this can be more efficient -- than 'distribute' as it does not need to maintain state. This fold terminates -- when all the folds terminate. -- +-- 'Done1' and 'Done' are treated equally for the case of distribute_. This is +-- not the case for 'distribute'. {-# INLINE distribute_ #-} distribute_ :: Monad m => [Fold m a ()] -> Fold m a () distribute_ fs = Fold step initial extract where - initial = Prelude.mapM (fmap Partial . initialize) fs + initial = Prelude.mapM initialize fs + -- XXX This itself can be a recursive function but we might face inlining + -- problems. + step [] _ = return $ Done1 () step ss a = do - ss1 <- Prelude.mapM (flip runMaybeStep a) ss - let endRes = - List.foldl' - (\fb fa -> toUnitState fa `combineFoldState` fb) - (Done1 ()) - ss1 + ss1 <- go ss a return - $ case endRes of - Done () -> Done () - Done1 () -> Done1 () - Partial () -> Partial ss1 - - extract ss = Prelude.mapM_ runMaybeExtract ss - - {-# INLINE runMaybeExtract #-} - runMaybeExtract (Partial (Fold _ i d)) = i >>= d - runMaybeExtract _ = return () - - {-# INLINE runMaybeStep #-} - runMaybeStep (Done1 ()) _ = return $ Done1 () - runMaybeStep (Done ()) _ = return $ Done1 () - runMaybeStep (Partial (Fold s i d)) a = do - ii <- i - res <- s ii a - return - $ case res of - Partial sres -> Partial (Fold s (return sres) d) - Done _ -> Done () - Done1 _ -> Done1 () + $ if Prelude.null ss1 + then Done () + else Partial ss1 + + go [] _ = return [] + go ((Fold s i d):ss) a = do + i1 <- i + res <- s i1 a + case res of + Partial i2 -> do + rst <- go ss a + let fld = Fold s (return i2) d + return $ fld : rst + Done () -> go ss a + Done1 () -> go ss a + + extract ss = go1 ss + + go1 [] = return () + go1 ((Fold _ i d):ss) = (i >>= d) >> go1 ss ------------------------------------------------------------------------------ -- Partitioning @@ -1407,7 +1389,7 @@ distribute_ fs = Fold step initial extract -- | Partition the input over two folds using an 'Either' partitioning -- predicate. This fold terminates when both the folds terminate. -- --- See 'partitionByFstM' and 'partitionByMinM'. +-- See also: 'partitionByFstM' and 'partitionByMinM'. -- -- @ -- @@ -1607,52 +1589,40 @@ demuxWith f kv = Fold step initial extract where - initial = - Tuple' (Map.size kv) <$> Prelude.mapM (fmap Left . initialize) kv + initial = Tuple' Map.empty <$> Prelude.mapM initialize kv - step (Tuple' n mp) a = - let (k, a') = f a - in case Map.lookup k mp of - Nothing -> return $ Partial $ Tuple' n mp - Just (Left (Fold stp ini dn)) -> do + step (Tuple' bmp mp) a = + if Map.size mp > 0 + then do + let (k, a') = f a + case Map.lookup k mp of + Nothing -> return $ Partial $ Tuple' bmp mp + Just (Fold stp ini dn) -> do !st <- ini !res <- stp st a' - let n1 = n - 1 return $ case res of Partial sres -> - Partial - $ Tuple' n - $ Map.insert - k - (Left (Fold stp (return sres) dn)) - mp - -- XXX Check for n - 1 == 0 here for Done & Done1? - -- XXX Treat the last completing fold differently? + let fld = Fold stp (return sres) dn + mp1 = Map.insert k fld mp + in Partial $ Tuple' bmp mp1 Done bres -> - if n1 == 0 - then Done $ done $ Map.insert k (Right bres) mp - else Partial - $ Tuple' n1 - $ Map.insert k (Right bres) mp + let mp1 = Map.delete k mp + bmp1 = Map.insert k bres bmp + in if Map.size mp1 == 0 + then Done bmp1 + else Partial $ Tuple' bmp1 mp1 Done1 bres -> - if n1 == 0 - then Done1 $ done $ Map.insert k (Right bres) mp - else Partial - $ Tuple' n1 - $ Map.insert k (Right bres) mp - Just (Right _) -> do - return $ Partial $ Tuple' n mp + let mp1 = Map.delete k mp + bmp1 = Map.insert k bres bmp + in if Map.size mp1 == 0 + then Done1 bmp1 + else Partial $ Tuple' bmp1 mp1 + else return $ Done1 bmp - extract (Tuple' _ mp) = Prelude.mapM runEitherExtract mp - - runEitherExtract (Left (Fold _ i d)) = i >>= d - runEitherExtract (Right b) = return b - - done = Map.map runEitherDone - - runEitherDone (Left _) = error "Incomplete folds exist" - runEitherDone (Right b) = b + extract (Tuple' bmp mp) = do + mpe <- Prelude.mapM (\(Fold _ i d) -> i >>= d) mp + return $ bmp `Map.union` mpe -- | Fold a stream of key value pairs using a map of specific folds for each -- key into a map from keys to the results of fold outputs of the corresponding @@ -1671,8 +1641,8 @@ demux :: (Monad m, Ord k) => Map k (Fold m a b) -> Fold m (k, a) (Map k b) demux = demuxWith id --- data DemuxState m s = DemuxP !m !s | DemuxD !m -data DemuxState m s = DemuxP Int !s !m | DemuxD Int !m +-- data DemuxState m s = DemuxP !m !s | DemuxingOnlyMap !m +data DemuxState s m1 m2 = DemuxingWithDefault !s !m1 !m2 | DemuxingOnlyMap !m2 {-# INLINE demuxWithDefault_ #-} demuxWithDefault_ :: (Monad m, Ord k) @@ -1682,76 +1652,87 @@ demuxWithDefault_ f kv (Fold dstep dinitial dextract) = where - initial = - DemuxP (Map.size kv + 1) <$> dinitial <*> Prelude.mapM initialize kv + initial = do + ini <- dinitial + mp <- Prelude.mapM initialize kv + return $ DemuxingWithDefault ini Set.empty mp - step (DemuxP n dacc mp) a - | (k, a') <- f a = + -- XXX Some code can probably be abstracted + step (DemuxingWithDefault dacc bst mp) a = + if Map.size mp > 0 + then do + let (k, a1) = f a case Map.lookup k mp of - Nothing -> do - res <- dstep dacc (k, a') - let n1 = n - 1 - return - $ case res of - Partial sres -> Partial $ DemuxP n sres mp - Done _ -> - if n1 == 0 - then Done () - else Partial $ DemuxD n1 mp - Done1 _ -> - if n1 == 0 - then Done1 () - else Partial $ DemuxD n1 mp + Nothing -> + if k `Set.member` bst + then return $ Partial $ DemuxingWithDefault dacc bst mp + else do + res <- dstep dacc (k, a1) + return + $ Partial + $ case res of + Partial sres -> DemuxingWithDefault sres bst mp + Done _ -> DemuxingOnlyMap mp + Done1 _ -> DemuxingOnlyMap mp Just (Fold stp ini dn) -> do !st <- ini - !res <- stp st a' - let n1 = n - 1 + !res <- stp st a1 return $ case res of Partial sres -> - Partial - $ DemuxP n dacc - $ Map.insert k (Fold stp (return sres) dn) mp - -- XXX Check for n - 1 == 0 here for Done & Done1? - -- XXX Treat the last completing fold differently? + let fld = (Fold stp (return sres) dn) + mp1 = Map.insert k fld mp + in Partial $ DemuxingWithDefault dacc bst mp1 Done _ -> - if n1 == 0 + -- XXX We can skip the check here + if Map.size mp == 1 then Done () - else Partial $ DemuxP n1 dacc $ Map.delete k mp + else Partial + $ DemuxingWithDefault + dacc + (Set.insert k bst) + (Map.delete k mp) Done1 _ -> - if n1 == 0 + if Map.size mp == 1 then Done1 () - else Partial $ DemuxP n1 dacc $ Map.delete k mp + else Partial + $ DemuxingWithDefault + dacc + (Set.insert k bst) + (Map.delete k mp) + else return $ Done1 () -- XXX Reduce code duplication? - step (DemuxD n mp) a - | (k, a') <- f a = + step (DemuxingOnlyMap mp) a = + if Map.size mp > 0 + then do + let (k, a1) = f a case Map.lookup k mp of - Nothing -> return $ Partial $ DemuxD n mp + Nothing -> return $ Partial $ DemuxingOnlyMap mp Just (Fold stp ini dn) -> do !st <- ini - !res <- stp st a' - let n1 = n - 1 + !res <- stp st a1 return $ case res of Partial sres -> Partial - $ DemuxD n + $ DemuxingOnlyMap $ Map.insert k (Fold stp (return sres) dn) mp -- XXX Check for n - 1 == 0 here for Done & Done1? -- XXX Treat the last completing fold differently? Done _ -> - if n1 == 0 + if Map.size mp == 1 then Done () - else Partial $ DemuxD n1 $ Map.delete k mp + else Partial $ DemuxingOnlyMap $ Map.delete k mp Done1 _ -> - if n1 == 0 + if Map.size mp == 1 then Done1 () - else Partial $ DemuxD n1 $ Map.delete k mp + else Partial $ DemuxingOnlyMap $ Map.delete k mp + else return $ Done1 () - extract (DemuxP _ dacc mp) = do + extract (DemuxingWithDefault dacc _ mp) = do void $ dextract dacc Prelude.mapM_ (\(Fold _ i d) -> i >>= d) mp - extract (DemuxD _ mp) = do + extract (DemuxingOnlyMap mp) = do Prelude.mapM_ (\(Fold _ i d) -> i >>= d) mp -- | Split the input stream based on a key field and fold each split using a @@ -1991,7 +1972,8 @@ unzipWithMinM :: -- Monad m => unzipWithMinM = undefined -- | Split elements in the input stream into two parts using a pure splitter --- function, direct each part to a different fold and zip the results. +-- function, direct each part to a different fold and zip the results. The fold +-- terminates when both the input folds terminate. -- -- @since 0.7.0 {-# INLINE unzipWith #-} diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index aec45cd621..717c604c93 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -134,7 +134,6 @@ module Streamly.Internal.Data.Fold.Types , ltake , ltakeWhile - , distributiveAp , teeWith , teeWithFst , teeWithMin @@ -301,7 +300,7 @@ instance Monad m => Applicative (Fold m a) where pure b = Fold (\() _ -> pure $ Done b) (pure ()) (\() -> pure b) -- XXX deprecate this? {-# INLINE (<*>) #-} - (<*>) = distributiveAp + (<*>) = teeWith ($) -- | @teeWith f f1 f2@ distributes its input to both @f1@ and @f2@ until both -- of them terminate and combines their output using @f@. @@ -385,10 +384,6 @@ teeWithFst = undefined teeWithMin :: (b -> c -> d) -> Fold m a b -> Fold m a c -> Fold m a d teeWithMin = undefined -{-# INLINE distributiveAp #-} -distributiveAp :: Monad m => Fold m a (b -> c) -> Fold m a b -> Fold m a c -distributiveAp = teeWith ($) - -- | Combines the outputs of the folds (the type @b@) using their 'Semigroup' -- instances. instance (Semigroup b, Monad m) => Semigroup (Fold m a b) where @@ -634,11 +629,11 @@ ltakeWhile predicate (Fold fstep finitial fextract) = {-# INLINABLE duplicate #-} duplicate :: Monad m => Fold m a b -> Fold m a (Fold m a b) duplicate (Fold step begin done) = - Fold step' begin (\x -> pure (Fold step (pure x) done)) + Fold step1 begin (\x -> pure (Fold step (pure x) done)) where - step' x a = do + step1 x a = do res <- step x a -- XXX Discuss about initial element case res of @@ -733,7 +728,7 @@ many (Fold fstep finitial fextract) (Fold step1 initial1 extract1) = Done x -> return x Done1 x -> return x -data GroupByState a s = GroupByN !s | GroupByJ !a !s +data GroupByState a s = GroupByInit !s | GroupByGrouping !a !s {-# INLINE groupBy #-} groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a b @@ -741,16 +736,16 @@ groupBy cmp (Fold fstep finitial fextract) = Fold step initial extract where - initial = GroupByN <$> finitial + initial = GroupByInit <$> finitial - step (GroupByN s) a = do + step (GroupByInit s) a = do res <- fstep s a return $ case res of Done bres -> Done bres Done1 bres -> Done1 bres - Partial sres -> Partial (GroupByJ a sres) - step (GroupByJ a0 s) a = + Partial sres -> Partial (GroupByGrouping a sres) + step (GroupByGrouping a0 s) a = if cmp a0 a then do res <- fstep s a @@ -758,11 +753,11 @@ groupBy cmp (Fold fstep finitial fextract) = Fold step initial extract case res of Done bres -> Done bres Done1 bres -> Done1 bres - Partial sres -> Partial (GroupByJ a0 sres) + Partial sres -> Partial (GroupByGrouping a0 sres) else Done1 <$> fextract s - extract (GroupByN s) = fextract s - extract (GroupByJ _ s) = fextract s + extract (GroupByInit s) = fextract s + extract (GroupByGrouping _ s) = fextract s {-# INLINE groupByRolling #-} groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a b @@ -770,16 +765,16 @@ groupByRolling cmp (Fold fstep finitial fextract) = Fold step initial extract where - initial = GroupByN <$> finitial + initial = GroupByInit <$> finitial - step (GroupByN s) a = do + step (GroupByInit s) a = do res <- fstep s a return $ case res of Done bres -> Done bres Done1 bres -> Done1 bres - Partial sres -> Partial (GroupByJ a sres) - step (GroupByJ a0 s) a = + Partial sres -> Partial (GroupByGrouping a sres) + step (GroupByGrouping a0 s) a = if cmp a0 a then do res <- fstep s a @@ -787,11 +782,11 @@ groupByRolling cmp (Fold fstep finitial fextract) = Fold step initial extract case res of Done bres -> Done bres Done1 bres -> Done1 bres - Partial sres -> Partial (GroupByJ a sres) + Partial sres -> Partial (GroupByGrouping a sres) else Done1 <$> fextract s - extract (GroupByN s) = fextract s - extract (GroupByJ _ s) = fextract s + extract (GroupByInit s) = fextract s + extract (GroupByGrouping _ s) = fextract s -- | For every n input items, apply the first fold and supply the result to the -- next fold. @@ -878,6 +873,18 @@ takeByTime n (Fold step initial done) = Fold step' initial' done' handleChildException :: MVar Bool -> SomeException -> IO () handleChildException mv _ = void $ swapMVar mv True +-- | Group the input stream into windows of n second each and then fold each +-- group using the provided fold function. +-- +-- For example, we can copy and distribute a stream to multiple folds where +-- each fold can group the input differently e.g. by one second, one minute and +-- one hour windows respectively and fold each resulting stream of folds. +-- +-- @ +-- +-- -----Fold m a b----|-Fold n a c-|-Fold n a c-|-...-|----Fold m a c +-- +-- @ {-# INLINE lsessionsOf #-} lsessionsOf :: MonadAsync m => Double -> Fold m a b -> Fold m b c -> Fold m a c lsessionsOf n split collect = many collect (takeByTime n split) diff --git a/src/Streamly/Internal/Data/Parser/ParserD.hs b/src/Streamly/Internal/Data/Parser/ParserD.hs index e9779db9fd..863d2841a1 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -300,7 +300,6 @@ either parser = Parser step initial extract -- Taking elements ------------------------------------------------------------------------------- --- XXX Convert this to `take :: Int -> Fold m a b -> Parser m a b` -- It will be inconsistent with other takeish combinators. -- This is takeLE -- | See 'Streamly.Internal.Data.Parser.take'. @@ -693,8 +692,9 @@ manyTill (Fold fstep finitial fextract) l <- initialR return $ Partial n (ManyTillR 0 fs1 l) FL.Done fb -> return $ Done n fb - -- Keep a count of elements - FL.Done1 _ -> error "Done1 nore supported in manyTill" + FL.Done1 fb -> do + assert (cnt + 1 - n >= 0) (return ()) + return $ Done (cnt + 1) fb Error err -> return $ Error err extract (ManyTillL _ fs sR) = do diff --git a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs index eabdf6e629..3212e2b182 100644 --- a/src/Streamly/Internal/Data/Parser/ParserD/Types.hs +++ b/src/Streamly/Internal/Data/Parser/ParserD/Types.hs @@ -504,8 +504,8 @@ splitSome (Fold fstep finitial fextract) (Parser step1 initial1 extract1) = step (Tuple3' st _ (Left fs)) a = do r <- step1 st a case r of - Partial n s -> return $ Continue n (Tuple3' s undefined (Left fs)) - Continue n s -> return $ Continue n (Tuple3' s undefined (Left fs)) + Partial n s -> return $ Continue n (Tuple3' s 0 (Left fs)) + Continue n s -> return $ Continue n (Tuple3' s 0 (Left fs)) Done n b -> do s <- initial1 fs1 <- fstep fs b diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index 43bcd25487..6497891aca 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -4720,8 +4720,9 @@ classifySessionsBy tick tmout reset ejectPred (Fold step initial extract) str = -- old ones. -- -- We use the first strategy as of now. + -- Got a new stream input element - sstep session@SessionState {..} (Just (key, value, timestamp)) = do + sstep session@SessionState{..} (Just (key, value, timestamp)) = do -- XXX we should use a heap in pinned memory to scale it to a large -- size -- diff --git a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs index 66cc8a0657..d36c3b0600 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD/Type.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD/Type.hs @@ -468,7 +468,7 @@ foldlMx' fstep begin done (Stream step state) = case r of Yield x s -> do acc' <- fstep acc x - go SPEC acc' s + go SPEC acc' s Skip s -> go SPEC acc s Stop -> done acc From 95d4c6fd8eb92e1829cdaa1b238e9b3cfbf89935 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 17 Sep 2020 11:45:44 +0530 Subject: [PATCH 25/30] Fix bug --- src/Streamly/Internal/Data/Stream/StreamD.hs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/StreamD.hs b/src/Streamly/Internal/Data/Stream/StreamD.hs index 61f43136e9..3e31a34e10 100644 --- a/src/Streamly/Internal/Data/Stream/StreamD.hs +++ b/src/Streamly/Internal/Data/Stream/StreamD.hs @@ -1557,13 +1557,12 @@ wordsBy predicate (Fold fstep initial done) (Stream step state) = step1 gst (WordBegin st) = do res <- step (adaptState gst) st case res of - Yield x s -> do - if predicate x - then return $ Skip (WordBegin s) - else do - -- step1 gst (WordBeginWith s x) - ini <- initial - wordFoldWith ini s x + Yield x s -> + return + $ Skip + $ if predicate x + then WordBegin s + else WordBeginWith s x Skip s -> return $ Skip $ WordBegin s Stop -> return Stop step1 gst (WordFold fs st) = do @@ -1576,7 +1575,9 @@ wordsBy predicate (Fold fstep initial done) (Stream step state) = return $ Skip $ WordYield bres (WordBegin s) else wordFoldWith fs s x Skip s -> return $ Skip $ WordFold fs s - Stop -> return Stop + Stop -> do + bres <- done fs + return $ Skip $ WordYield bres (WordBegin st) step1 _ (WordYield bres ns) = return $ Yield bres ns step1 _ (WordBeginWith s x) = do -- XXX We dont need to check for predicate here, we already know "x" From 83d1e824682adcd1b3c681b9011b292b5f2cec8a Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 17 Sep 2020 13:10:08 +0530 Subject: [PATCH 26/30] change IsStream groupsBy arg order --- src/Streamly/Internal/Data/Stream/IsStream.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index 6497891aca..f8cea44bb8 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -3820,7 +3820,9 @@ intervalsOf n f xs = ------------------------------------------------------------------------------ -- N-ary APIs ------------------------------------------------------------------------------ --- + +-- XXX We should probably change the order of the comparision and update the +-- docs accordingly. -- | @groupsBy cmp f $ S.fromList [a,b,c,...]@ assigns the element @a@ to the -- first group, if @b \`cmp` a@ is 'True' then @b@ is also assigned to the same -- group. If @c \`cmp` a@ is 'True' then @c@ is also assigned to the same @@ -3839,7 +3841,7 @@ groupsBy -> Fold m a b -> t m a -> t m b -groupsBy cmp f m = D.fromStreamD $ D.groupsBy cmp f (D.toStreamD m) +groupsBy cmp f m = D.fromStreamD $ D.groupsBy (flip cmp) f (D.toStreamD m) -- | Unlike @groupsBy@ this function performs a rolling comparison of two -- successive elements in the input stream. @groupsByRolling cmp f $ S.fromList From 1c833c174abe35f646cf8eb3fe104d0b4b38b251 Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 18 Sep 2020 06:53:18 +0530 Subject: [PATCH 27/30] Fix build for 822 --- src/Streamly/Internal/Data/Fold.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index b835e787c6..4df4a7cd7d 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -736,7 +736,12 @@ sconcat i = Fold (\x a -> return $ Partial $ x <> a) (return i) return -- -- @since 0.7.0 {-# INLINE mconcat #-} -mconcat :: (Monad m, Monoid a) => Fold m a a +mconcat :: + ( Monad m +#if MIN_VERSION_base(4,11,0) + , Semigroup a +#endif + , Monoid a) => Fold m a a mconcat = sconcat mempty -- | @@ -1359,7 +1364,7 @@ distribute_ fs = Fold step initial extract -- problems. step [] _ = return $ Done1 () step ss a = do - ss1 <- go ss a + !ss1 <- go ss a return $ if Prelude.null ss1 then Done () @@ -1972,8 +1977,9 @@ unzipWithMinM :: -- Monad m => unzipWithMinM = undefined -- | Split elements in the input stream into two parts using a pure splitter --- function, direct each part to a different fold and zip the results. The fold --- terminates when both the input folds terminate. +-- function, direct each part to a different fold and zip the results. +-- +-- This fold terminates when both the input folds terminate. -- -- @since 0.7.0 {-# INLINE unzipWith #-} From 7bc1e67245761ce7c1bcb3a29d35d79913460ceb Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 18 Sep 2020 06:58:42 +0530 Subject: [PATCH 28/30] fixup! Fix build for 822 --- src/Streamly/Internal/Data/Fold.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Streamly/Internal/Data/Fold.hs b/src/Streamly/Internal/Data/Fold.hs index 4df4a7cd7d..dc6e594843 100644 --- a/src/Streamly/Internal/Data/Fold.hs +++ b/src/Streamly/Internal/Data/Fold.hs @@ -738,7 +738,7 @@ sconcat i = Fold (\x a -> return $ Partial $ x <> a) (return i) return {-# INLINE mconcat #-} mconcat :: ( Monad m -#if MIN_VERSION_base(4,11,0) +#if !MIN_VERSION_base(4,11,0) , Semigroup a #endif , Monoid a) => Fold m a a From fac225efe0cfb99d38fc7e67bad5e204f1fe677b Mon Sep 17 00:00:00 2001 From: adithyaov Date: Fri, 18 Sep 2020 09:44:04 +0530 Subject: [PATCH 29/30] Add tests for splitOnSeq and splitOnSuffixSeq --- src/Streamly/Internal/Data/Stream/IsStream.hs | 2 + test/Streamly/Test/Prelude/Serial.hs | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/Streamly/Internal/Data/Stream/IsStream.hs b/src/Streamly/Internal/Data/Stream/IsStream.hs index f8cea44bb8..0d768e9011 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream.hs @@ -4157,6 +4157,8 @@ splitOnAny splitOnAny subseq f m = undefined -- D.fromStreamD $ D.splitOnAny f subseq (D.toStreamD m) -} +-- XXX Check the 1st example +-- XXX Is splitSuffixOn a typo? -- | Like 'splitSuffixBy' but the separator is a sequence of elements, instead -- of a predicate for a single element. -- diff --git a/test/Streamly/Test/Prelude/Serial.hs b/test/Streamly/Test/Prelude/Serial.hs index 326dcf5ab4..920d31c332 100644 --- a/test/Streamly/Test/Prelude/Serial.hs +++ b/test/Streamly/Test/Prelude/Serial.hs @@ -32,7 +32,9 @@ import Test.Hspec as H import Streamly.Prelude ( SerialT, IsStream, avgRate, maxBuffer, serial, serially) import qualified Streamly.Prelude as S +import qualified Streamly.Internal.Data.Stream.IsStream as S import qualified Streamly.Data.Fold as FL +import qualified Streamly.Data.Array.Storable.Foreign as A import Streamly.Test.Common import Streamly.Test.Prelude @@ -163,6 +165,58 @@ associativityCheck desc t = prop desc assocCheckProp assocStream <- run $ S.toList $ t $ xStream <> yStream <> zStream listEquals (==) infixAssocstream assocStream +splitOnSeq :: Spec +splitOnSeq = do + describe "Tests for splitOnSeq" $ do + it "splitOnSeq' \"hello\" \"\" = [\"\"]" + $ splitOnSeq' "hello" "" `shouldReturn` [""] + it "splitOnSeq' \"hello\" \"hello\" = [\"\", \"\"]" + $ splitOnSeq' "hello" "hello" `shouldReturn` ["", ""] + it "splitOnSeq' \"x\" \"hello\" = [\"hello\"]" + $ splitOnSeq' "x" "hello" `shouldReturn` ["hello"] + it "splitOnSeq' \"h\" \"hello\" = [\"\", \"ello\"]" + $ splitOnSeq' "h" "hello" `shouldReturn` ["", "ello"] + it "splitOnSeq' \"o\" \"hello\" = [\"hell\", \"\"]" + $ splitOnSeq' "o" "hello" `shouldReturn` ["hell", ""] + it "splitOnSeq' \"e\" \"hello\" = [\"h\", \"llo\"]" + $ splitOnSeq' "e" "hello" `shouldReturn` ["h", "llo"] + it "splitOnSeq' \"l\" \"hello\" = [\"he\", \"\", \"o\"]" + $ splitOnSeq' "l" "hello" `shouldReturn` ["he", "", "o"] + it "splitOnSeq' \"ll\" \"hello\" = [\"he\", \"o\"]" + $ splitOnSeq' "ll" "hello" `shouldReturn` ["he", "o"] + + where + + splitOnSeq' pat xs = + S.toList $ S.splitOnSeq (A.fromList pat) (FL.toList) (S.fromList xs) + +-- XXX Check the 1st test +splitOnSuffixSeq :: Spec +splitOnSuffixSeq = do + describe "Tests for splitOnSuffixSeq" $ do + it "splitSuffixOn_ \".\" \"\" [\"\"]" + $ splitSuffixOn_ "." "" `shouldReturn` [""] + it "splitSuffixOn_ \".\" \".\" [\"\"]" + $ splitSuffixOn_ "." "." `shouldReturn` [""] + it "splitSuffixOn_ \".\" \"a\" [\"a\"]" + $ splitSuffixOn_ "." "a" `shouldReturn` ["a"] + it "splitSuffixOn_ \".\" \".a\" [\"\",\"a\"]" + $ splitSuffixOn_ "." ".a" `shouldReturn` ["", "a"] + it "splitSuffixOn_ \".\" \"a.\" [\"a\"]" + $ splitSuffixOn_ "." "a." `shouldReturn` ["a"] + it "splitSuffixOn_ \".\" \"a.b\" [\"a\",\"b\"]" + $ splitSuffixOn_ "." "a.b" `shouldReturn` ["a", "b"] + it "splitSuffixOn_ \".\" \"a.b.\" [\"a\",\"b\"]" + $ splitSuffixOn_ "." "a.b." `shouldReturn` ["a", "b"] + it "splitSuffixOn_ \".\" \"a..b..\" [\"a\",\"\",\"b\",\"\"]" + $ splitSuffixOn_ "." "a..b.." `shouldReturn` ["a", "", "b", ""] + + where + + splitSuffixOn_ pat xs = + S.toList $ S.splitOnSuffixSeq (A.fromList pat) (FL.toList) (S.fromList xs) + + main :: IO () main = hspec $ H.parallel @@ -272,3 +326,6 @@ main = hspec describe "Tests for S.groupsBy" $ do prop "testGroupsBy" testGroupsBy prop "testGroupsBySep" testGroupsBySep + + splitOnSeq + splitOnSuffixSeq From 9c8045a69c3aafe749839d0d602bd075ae04eb3b Mon Sep 17 00:00:00 2001 From: adithyaov Date: Thu, 15 Oct 2020 02:45:38 +0530 Subject: [PATCH 30/30] Update the implementation of lchunksOf * The only difference from the previous implementation is that the `if` statement encompasses `collect` rather then the other way around. --- src/Streamly/Internal/Data/Fold/Types.hs | 49 +++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/Streamly/Internal/Data/Fold/Types.hs b/src/Streamly/Internal/Data/Fold/Types.hs index 717c604c93..88df6dbb24 100644 --- a/src/Streamly/Internal/Data/Fold/Types.hs +++ b/src/Streamly/Internal/Data/Fold/Types.hs @@ -788,12 +788,59 @@ groupByRolling cmp (Fold fstep finitial fextract) = Fold step initial extract extract (GroupByInit s) = fextract s extract (GroupByGrouping _ s) = fextract s +-- XXX The only difference from the previous implementation is that the `if` +-- statement encompasses `collect` rather then the other way around. -- | For every n input items, apply the first fold and supply the result to the -- next fold. -- {-# INLINE lchunksOf #-} lchunksOf :: Monad m => Int -> Fold m a b -> Fold m b c -> Fold m a c -lchunksOf n split collect = many collect (ltake n split) +lchunksOf n (Fold sstp sini sext) (Fold cstp cini cext) = + Fold step initial extract + + where + + {-# INLINE initial #-} + initial = Tuple3' n <$> sini <*> cini + + {-# INLINE extract #-} + extract (Tuple3' _ ss cs) = do + sb <- sext ss + cs0 <- cstp cs sb + case cs0 of + Partial cs1 -> cext cs1 + Done cb -> return cb + Done1 cb -> return cb + + {-# INLINE step #-} + step (Tuple3' i ss cs) a = split i ss cs a + + {-# INLINE collect #-} + collect cs b onP = do + cs0 <- cstp cs b + case cs0 of + Partial cs1 -> do + esini <- sini + onP 0 esini cs1 + Done cb -> return $ Done cb + -- The branch below is incorrect + Done1 cb -> return $ Done cb + + {-# INLINE split #-} + split i ss cs a = do + let i1 = i + 1 + ss0 <- sstp ss a + let done i_ ss_ cs_ = return $ Partial $ Tuple3' i_ ss_ cs_ + done1 a_ i_ ss_ cs_ = split i_ ss_ cs_ a_ + case ss0 of + Partial ss1 -> + if i1 == n + then do + sb <- sext ss1 + collect cs sb done + else return $ Partial $ Tuple3' i1 ss1 cs + Done sb -> collect cs sb done + Done1 sb -> collect cs sb (done1 a) {-# INLINE lchunksOf2 #-} lchunksOf2 :: Monad m => Int -> Fold m a b -> Fold2 m x b c -> Fold2 m x a c