Skip to content

Commit

Permalink
Re-implement span, spanBy, and spanByRolling as parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
adithyaov committed Feb 9, 2021
1 parent d777433 commit 5a04a15
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 21 deletions.
27 changes: 7 additions & 20 deletions benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs
Expand Up @@ -175,35 +175,22 @@ longestAllAny value =
)

-------------------------------------------------------------------------------
-- Derived parsers
-- Spanning
-------------------------------------------------------------------------------

{-# INLINE span #-}
span :: MonadThrow m => Int -> SerialT m Int -> m ((), ())
span value = IP.parseD (span_ (<= (value `div` 2)) FL.drain FL.drain)

where

span_ p f1 f2 = PR.splitWith (,) (PR.takeWhile p f1) (PR.fromFold f2)
span value = IP.parseD (PR.span (<= (value `div` 2)) FL.drain FL.drain)

{-# INLINE spanBy #-}
spanBy :: MonadThrow m => Int -> SerialT m Int -> m ((), ())
spanBy value =
IP.parseD (spanBy_ (\_ i -> i <= (value `div` 2)) FL.drain FL.drain)

where

spanBy_ c f1 f2 = PR.splitWith (,) (PR.groupBy c f1) (PR.fromFold f2)
IP.parseD (PR.spanBy (\_ i -> i <= (value `div` 2)) FL.drain FL.drain)

{-# INLINE spanByRolling #-}
spanByRolling :: MonadThrow m => Int -> SerialT m Int -> m ((), ())
spanByRolling value =
IP.parseD (spanByRolling_ (\_ i -> i <= value `div` 2) FL.drain FL.drain)

where

spanByRolling_ c f1 f2 =
PR.splitWith (,) (PR.groupByRolling c f1) (PR.fromFold f2)
IP.parseD (PR.spanByRolling (\_ i -> i <= value `div` 2) FL.drain FL.drain)

-------------------------------------------------------------------------------
-- Parsers in which -fspec-constr-recursive=16 is problematic
Expand Down Expand Up @@ -272,8 +259,8 @@ o_1_space_serial value =
, benchIOSink value "longest (all,any)" $ longestAllAny value
]

o_1_space_serial_derived :: Int -> [Benchmark]
o_1_space_serial_derived value =
o_1_space_serial_spanning :: Int -> [Benchmark]
o_1_space_serial_spanning value =
[ benchIOSink value "span" $ span value
, benchIOSink value "spanBy" $ spanBy value
, benchIOSink value "spanByRolling" $ spanByRolling value
Expand Down Expand Up @@ -313,7 +300,7 @@ main = do

allBenchmarks value =
[ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value)
, bgroup (o_1_space_prefix moduleName) (o_1_space_serial_derived value)
, bgroup (o_1_space_prefix moduleName) (o_1_space_serial_spanning value)
, bgroup (o_n_heap_prefix moduleName) (o_n_heap_serial value)
, bgroup (o_n_space_prefix moduleName) (o_n_space_serial value)
]
54 changes: 53 additions & 1 deletion src/Streamly/Internal/Data/Parser/ParserD.hs
Expand Up @@ -69,6 +69,11 @@ module Streamly.Internal.Data.Parser.ParserD
-- , suffixOf -- match any suffix of a given string
-- , infixOf -- match any substring of a given string

-- ** Spanning
, span
, spanBy
, spanByRolling

-- Second order parsers (parsers using parsers)
-- * Binary Combinators

Expand Down Expand Up @@ -162,7 +167,7 @@ import Streamly.Internal.Data.Tuple.Strict (Tuple'(..))
import qualified Streamly.Internal.Data.Fold.Types as FL

import Prelude hiding
(any, all, take, takeWhile, sequence, concatMap, maybe, either)
(any, all, take, takeWhile, sequence, concatMap, maybe, either, span)
import Streamly.Internal.Data.Parser.ParserD.Tee
import Streamly.Internal.Data.Parser.ParserD.Types

Expand Down Expand Up @@ -724,6 +729,53 @@ eqBy cmp str = Parser step initial extract
$ "eqBy: end of input, yet to match "
++ show (length xs) ++ " elements"

--------------------------------------------------------------------------------
--- Spanning
--------------------------------------------------------------------------------

-- | @span p f1 f2@ composes folds @f1@ and @f2@ such that @f1@ consumes the
-- input as long as the predicate @p@ is 'True'. @f2@ consumes the rest of the
-- input.
--
-- > let span_ p xs = S.parse (FL.span p FL.toList FL.toList) $ S.fromList xs
--
-- >>> span_ (< 1) [1,2,3]
-- > ([],[1,2,3])
--
-- >>> span_ (< 2) [1,2,3]
-- > ([1],[2,3])
--
-- >>> span_ (< 4) [1,2,3]
-- > ([1,2,3],[])
--
-- /Internal/
{-# INLINE span #-}
span :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c)
span p f1 f2 = noErrorUnsafeSplitWith (,) (takeWhile p f1) (fromFold f2)

-- | Break the input stream into two groups, the first group takes the input as
-- long as the predicate applied to the first element of the stream and next
-- input element holds 'True', the second group takes the rest of the input.
--
-- /Internal/
--
{-# INLINE spanBy #-}
spanBy ::
Monad m
=> (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c)
spanBy eq f1 f2 = noErrorUnsafeSplitWith (,) (groupBy eq f1) (fromFold f2)

-- | Like 'spanBy' but applies the predicate in a rolling fashion i.e.
-- predicate is applied to the previous and the next input elements.
--
-- /Internal/
{-# INLINE spanByRolling #-}
spanByRolling ::
Monad m
=> (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser m a (b, c)
spanByRolling eq f1 f2 =
noErrorUnsafeSplitWith (,) (groupByRolling eq f1) (fromFold f2)

-------------------------------------------------------------------------------
-- nested parsers
-------------------------------------------------------------------------------
Expand Down

0 comments on commit 5a04a15

Please sign in to comment.