Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Data.Parser.ParserD.deintercalate #843

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions benchmark/Streamly/Benchmark/Data/Parser.hs
Expand Up @@ -59,6 +59,11 @@ benchIOSink value name f =
-------------------------------------------------------------------------------
-- Parsers
-------------------------------------------------------------------------------
{-# INLINE deintercalate #-}
deintercalate :: MonadCatch m => Int -> SerialT m Int -> m ((), ())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need benchmarks for ParserD as well.

deintercalate value =
IP.parse (PR.deintercalate FL.drain (PR.satisfy (>= value))
FL.drain (PR.satisfy (< value)))

{-# INLINE takeBetween #-}
takeBetween :: MonadCatch m => Int -> SerialT m a -> m ()
Expand Down Expand Up @@ -272,6 +277,7 @@ moduleName = "Data.Parser"
o_1_space_serial :: Int -> [Benchmark]
o_1_space_serial value =
[ benchIOSink value "takeBetween" $ takeBetween value
, benchIOSink value "deintercalate" $ deintercalate value
, benchIOSink value "takeEQ" $ takeEQ value
, benchIOSink value "takeWhile" $ takeWhile value
, benchIOSink value "drainWhile" $ drainWhile value
Expand Down
12 changes: 9 additions & 3 deletions src/Streamly/Internal/Data/Parser.hs
Expand Up @@ -771,15 +771,21 @@ lookAhead p = K.toParserK $ D.lookAhead $ K.fromParserK p
--
-- This undoes a "gintercalate" of two streams.
--
-- /Unimplemented/
-- /Internal/
--
{-# INLINE deintercalate #-}
deintercalate ::
-- Monad m =>
MonadCatch m =>
Fold m a y -> Parser m x a
-> Fold m b z -> Parser m x b
-> Parser m x (y, z)
deintercalate = undefined
deintercalate fld1 prsr1 fld2 prsr2 =
K.toParserK $
D.deintercalate
fld1
(K.fromParserK prsr1)
fld2
(K.fromParserK prsr2)
Comment on lines +782 to +788
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
deintercalate fld1 prsr1 fld2 prsr2 =
K.toParserK $
D.deintercalate
fld1
(K.fromParserK prsr1)
fld2
(K.fromParserK prsr2)
deintercalate f1 p1 f2 p2 =
K.toParserK $ D.deintercalate f1 (K.fromParserK p1) f2 (K.fromParserK p2)


-------------------------------------------------------------------------------
-- Sequential Collection
Expand Down
125 changes: 121 additions & 4 deletions src/Streamly/Internal/Data/Parser/ParserD.hs
Expand Up @@ -154,7 +154,7 @@ module Streamly.Internal.Data.Parser.ParserD
where

import Control.Exception (assert)
import Control.Monad.Catch (MonadCatch, MonadThrow(..))
import Control.Monad.Catch (MonadCatch, MonadThrow(..), catchAll)
import Streamly.Internal.Data.Fold.Types (Fold(..))
import Streamly.Internal.Data.Tuple.Strict (Tuple'(..))

Expand Down Expand Up @@ -646,18 +646,135 @@ lookAhead (Parser step1 initial1 _) = Parser step initial extract
-------------------------------------------------------------------------------
-- Interleaving
-------------------------------------------------------------------------------

data ParserTurn = Parser1 | Parser2

--
-- | See 'Streamly.Internal.Data.Parser.deintercalate'.
--
-- /Unimplemented/
-- /Internal/
--
{-# INLINE deintercalate #-}
deintercalate ::
-- Monad m =>
MonadCatch m =>
Fold m a y -> Parser m x a
-> Fold m b z -> Parser m x b
-> Parser m x (y, z)
deintercalate = undefined
deintercalate
(Fold fstep1 finitial1 fextract1)
(Parser pstep1 pinitial1 pextract1)
(Fold fstep2 finitial2 fextract2)
(Parser pstep2 pinitial2 pextract2) =

Parser step initial extract

where

initial = do
finit1 <- finitial1
pinit1 <- pinitial1
finit2 <- finitial2
pinit2 <- pinitial2
return (finit1, pinit1, finit2, pinit2, Parser1, 0::Int)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depending on the benchmarks, you might want to use a strict state, Tuple6'


step (fs1, ps1, fs2, ps2, currentParser, numBuffered) a =
Copy link
Member

@adithyaov adithyaov Feb 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of currentParser you can probaly simplify this if you use Bool. Something like isParser1. Your call. You can try both and check the benchmarks. I suspect they won't differ much though.

case currentParser of
Parser1 -> do
st <- pstep1 ps1 a
case st of
Partial n ps1new ->
return $ Partial n (fs1, ps1new, fs2, ps2,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put the tuple on the same line?

currentParser, 0)
Continue n ps1new -> do
let
newNumBuffered =
if n==0
then numBuffered + 1
else numBuffered - n
Comment on lines +689 to +693
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If numBuffered + 1 - n < 0 there is some flaw in the logic. You should use asserts instead.
Also, shouldn't it be numBuffered + 1 - n instead of numBuffered - n?

return $ Continue n (fs1, ps1new, fs2, ps2,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put the tuple on the same line?

currentParser, newNumBuffered)
Done n result -> do
res1 <- fstep1 fs1 result
pinit1 <- pinitial1
pinit2 <- pinitial2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are multiple unrequired initializations here.
Looks like ps2 is pinit2
If you don't need both the states at once, you can probably encode the parser state in the constructors themselves.

case res1 of
FL.Partial fs1new ->
return $ Partial n (fs1new, pinit1, fs2, pinit2,
Parser2, numBuffered)
FL.Done result1 -> do
result2 <- fextract2 fs2
return $ Done numBuffered (result1, result2)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should just backtrack n elements here? Why numBuffered elements?

Error _ -> do
-- input definitively rejected by the first parser
pinit1 <- pinitial1
pinit2 <- pinitial2
return $ Partial numBuffered (fs1, pinit1, fs2, pinit2,
Copy link
Member

@adithyaov adithyaov Feb 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should backtrack numBuffered + 1 elements here instead of numBuffered?
And restart the buffer count from 0 instead of numBuffered - 1?

Parser2, numBuffered-1)
-- return $ Done numBuffered (result1, result2)

Parser2 -> do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comments made above apply to this branch.

st <- pstep2 ps2 a
case st of
Partial n ps2new ->
return $ Partial n (fs1, ps1, fs2, ps2new,
currentParser, 0)
Continue n ps2new -> do
let
newNumBuffered =
if n==0
then numBuffered + 1
else numBuffered - n
return $ Continue n (fs1, ps1, fs2, ps2new,
currentParser, newNumBuffered)
Done n result -> do
res2 <- fstep2 fs2 result
pinit1 <- pinitial1
pinit2 <- pinitial2
case res2 of
FL.Partial fs2new ->
return $ Partial n (fs1, pinit1, fs2new, pinit2,
Parser1, numBuffered)
FL.Done result2 -> do
result1 <- fextract1 fs1
return $ Done numBuffered (result1, result2)
Error _ -> do
-- input definitively rejected by the second parser
pinit1 <- pinitial1
pinit2 <- pinitial2
return $ Partial numBuffered (fs1, pinit1, fs2, pinit2,
Parser2, numBuffered-1)
-- return $ Done numBuffered (result1, result2)



extract (fs1, ps1, fs2, ps2, currentParser, _) =
case currentParser of
Parser1 -> do
maybeRes <- catchAll
(fmap Just $ pextract1 ps1)
(\_ -> return Nothing)
result1 <- case maybeRes of
Nothing -> fextract1 fs1
Just res -> do
fs1new <- fstep1 fs1 res
case fs1new of
FL.Done result1' -> return result1'
FL.Partial fs1new' -> fextract1 fs1new'
result2 <- fextract2 fs2
return (result1, result2)
Parser2 -> do
maybeRes <- catchAll
(fmap Just $ pextract2 ps2)
(\_ -> return Nothing)
result2 <- case maybeRes of
Nothing -> fextract2 fs2
Just res -> do
fs2new <- fstep2 fs2 res
case fs2new of
FL.Done result2' -> return result2'
FL.Partial fs2new' -> fextract2 fs2new'
result1 <- fextract1 fs1
return (result1, result2)

-------------------------------------------------------------------------------
-- Sequential Collection
Expand Down
36 changes: 25 additions & 11 deletions test/Streamly/Test/Data/Parser.hs
Expand Up @@ -191,6 +191,15 @@ takeBetween =
Left _ -> property (m > n || list_length < m)


-- deintercalate :: Property
-- deintercalate =
-- forAll (chooseInt (min_value, max_value)) $ \n ->
-- forAll (listOf (chooseInt (min_value, max_value))) $ \ls ->
-- case S.parse (P.deintercalate FL.drain (P.satisfy odd) FL.drain
-- (P.satisfy even)) (S.fromList ls) of
-- Right parsed_list -> property (length ls == length parsed_list)
-- Left _ -> property False

takeEQPass :: Property
takeEQPass =
forAll (chooseInt (min_value, max_value)) $ \n ->
Expand Down Expand Up @@ -431,17 +440,22 @@ wordBy =
-- Right _ -> False
-- Left _ -> True)

-- deintercalate :: Property
-- deintercalate =
-- forAll (listOf (chooseInt (0, 1))) $ \ls ->
-- case S.parse (P.deintercalate concatFold prsr_1 concatFold prsr_2) (S.fromList ls) of
-- Right parsed_list_tuple -> parsed_list_tuple == (partition (== 0) ls)
-- Left _ -> False
deintercalate :: Property
deintercalate =
forAll (listOf (chooseInt (0, 1))) $ \ls ->
case S.parse prsr (S.fromList ls) of
Right parsed_list_tuple ->
parsed_list_tuple == List.partition (== 0) ls
Left _ -> False

-- where
-- prsr_1 = (P.takeWhile (== 0) FL.toList)
-- prsr_2 = (P.takeWhile (== 1) FL.toList)
-- concatFold = FL.Fold (\concatList curr_list -> return $ concatList ++ curr_list) (return []) return
where
prsr = P.deintercalate concatFold prsr_1 concatFold prsr_2
prsr_1 = P.takeWhile (== 0) FL.toList
prsr_2 = P.takeWhile (== 1) FL.toList
concatFold = FL.Fold
(\concatList curr_list ->
return $ FL.Partial $ concatList ++ curr_list)
(return []) return

-- shortestPass :: Property
-- shortestPass =
Expand Down Expand Up @@ -684,7 +698,7 @@ main =
takeBetweenPass
prop ("P.takeBetween = Prelude.take when len >= m and len <= n and fail"
++ "otherwise fail") Main.takeBetween

prop "P.deintercalate test" deintercalate
prop "P.takeEQ = Prelude.take when len >= n" takeEQPass
prop "P.takeEQ = Prelude.take when len >= n and fail otherwise"
Main.takeEQ
Expand Down
27 changes: 27 additions & 0 deletions test/Streamly/Test/Data/Parser/ParserD.hs
Expand Up @@ -178,6 +178,32 @@ takeBetween =
else property False
Left _ -> property (m > n || list_length < m)

-- deintercalate :: Property
-- deintercalate =
-- forAll (chooseInt (min_value, max_value)) $ \n ->
-- forAll (listOf (chooseInt (min_value, max_value))) $ \ls ->
-- case S.parseD (P.deintercalate FL.drain (P.satisfy odd) FL.drain
-- (P.satisfy even)) (S.fromList ls) of
-- Right parsed_list -> property (length ls == length parsed_list)
-- Left _ -> property False

deintercalate :: Property
deintercalate =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is being tested in Parser as well. I guess you can skip testing this here.

forAll (listOf (chooseInt (0, 1))) $ \ls ->
case S.parseD prsr (S.fromList ls) of
Right parsed_list_tuple ->
parsed_list_tuple == List.partition (== 0) ls
Left _ -> False

where
prsr = P.deintercalate concatFold prsr_1 concatFold prsr_2
prsr_1 = P.takeWhile (== 0) FL.toList
prsr_2 = P.takeWhile (== 1) FL.toList
concatFold = FL.Fold
(\concatList curr_list ->
return $ FL.Partial $ concatList ++ curr_list)
(return []) return

take :: Property
take =
forAll (chooseInt (min_value, max_value)) $ \n ->
Expand Down Expand Up @@ -696,6 +722,7 @@ main =
takeBetweenPass
prop "P.takeBetween m n = Prelude.take when len >= m and len <= n and\
\fail otherwise" takeBetween
prop "P.deintercalate test" deintercalate
prop "P.take = Prelude.take" Main.take
prop "P.takeEQ = Prelude.take when len >= n" takeEQPass
prop "P.takeEQ = Prelude.take when len >= n and fail otherwise" Main.takeEQ
Expand Down