Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Failed experiment: add a Status type.

This is an attempt to address a bug I introduced when I changed the
semantics of the <|> operator, I introduced a bug.  Consider the
following parser:

  (char 'f' *> char 'i') <|> char 'o'

When I got rid of backtracking, this parser would accept an input of
"fo", which is clearly not desirable.

The Status type rescues this somewhat: when any parser consumes input, it
enters the Committed state.  The <|> operator only executes its RHS if its
LHS fails while in the Uncommitted state (i.e. hasn't consumed any input).

There's a nasty snag, of course: tracking this additional piece of
state regresses aeson parsing performance to *worse* than when we had
backtracking!
  • Loading branch information...
commit c2ec0a4fd956a8629327dbc7bbb924de45efe78e 1 parent 6c8d050
@bos authored
Showing with 74 additions and 62 deletions.
  1. +50 −49 Data/Attoparsec/Internal.hs
  2. +24 −13 Data/Attoparsec/Internal/Types.hs
View
99 Data/Attoparsec/Internal.hs
@@ -85,49 +85,49 @@ import qualified Data.ByteString.Unsafe as B
-- | If at least @n@ bytes of input are available, return the current
-- input, otherwise fail.
ensure :: Int -> Parser B.ByteString
-ensure !n = Parser $ \i0 a0 m0 kf ks ->
+ensure !n = Parser $ \i0 s0 a0 m0 kf ks ->
if B.length (unI i0) >= n
- then ks i0 a0 m0 (unI i0)
- else runParser (demandInput >> ensure n) i0 a0 m0 kf ks
+ then ks i0 s0 a0 m0 (unI i0)
+ else runParser (demandInput >> ensure n) i0 s0 a0 m0 kf ks
-- | Ask for input. If we receive any, pass it to a success
-- continuation, otherwise to a failure continuation.
-prompt :: Input -> Added -> More
- -> (Input -> Added -> More -> Result r)
- -> (Input -> Added -> More -> Result r)
+prompt :: Input -> Status -> Added -> More
+ -> (Input -> Status -> Added -> More -> Result r)
+ -> (Input -> Status -> Added -> More -> Result r)
-> Result r
-prompt i0 a0 _m0 kf ks = Partial $ \s ->
+prompt i0 s0 a0 _m0 kf ks = Partial $ \s ->
if B.null s
- then kf i0 a0 Complete
- else ks (i0 `mappend` I s) (a0 `mappend` Added s) Incomplete
+ then kf i0 s0 a0 Complete
+ else ks (i0 `mappend` I s) s0 (a0 `mappend` Added s) Incomplete
-- | Immediately demand more input via a 'Partial' continuation
-- result.
demandInput :: Parser ()
-demandInput = Parser $ \i0 a0 m0 kf ks ->
+demandInput = Parser $ \i0 s0 a0 m0 kf ks ->
if m0 == Complete
- then kf i0 a0 m0 ["demandInput"] "not enough bytes"
- else let kf' i a m = kf i a m ["demandInput"] "not enough bytes"
- ks' i a m = ks i a m ()
- in prompt i0 a0 m0 kf' ks'
+ then kf i0 s0 a0 m0 ["demandInput"] "not enough bytes"
+ else let kf' i s a m = kf i s a m ["demandInput"] "not enough bytes"
+ ks' i s a m = ks i s a m ()
+ in prompt i0 s0 a0 m0 kf' ks'
-- | This parser always succeeds. It returns 'True' if any input is
-- available either immediately or on demand, and 'False' if the end
-- of all input has been reached.
wantInput :: Parser Bool
-wantInput = Parser $ \i0 a0 m0 _kf ks ->
+wantInput = Parser $ \i0 s0 a0 m0 _kf ks ->
case () of
- _ | not (B.null (unI i0)) -> ks i0 a0 m0 True
- | m0 == Complete -> ks i0 a0 m0 False
- | otherwise -> let kf' i a m = ks i a m False
- ks' i a m = ks i a m True
- in prompt i0 a0 m0 kf' ks'
+ _ | not (B.null (unI i0)) -> ks i0 s0 a0 m0 True
+ | m0 == Complete -> ks i0 s0 a0 m0 False
+ | otherwise -> let kf' i s a m = ks i s a m False
+ ks' i s a m = ks i s a m True
+ in prompt i0 s0 a0 m0 kf' ks'
get :: Parser B.ByteString
-get = Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
+get = Parser $ \i0 s0 a0 m0 _kf ks -> ks i0 s0 a0 m0 (unI i0)
put :: B.ByteString -> Parser ()
-put s = Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
+put s = Parser $ \_i0 _s0 a0 m0 _kf ks -> ks (I s) Committed a0 m0 ()
-- | Attempt a parse, and if it fails, rewind the input so that no
-- input appears to have been consumed.
@@ -137,22 +137,23 @@ put s = Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
-- lookahead. The downside to using this combinator is that it can
-- retain input for longer than is desirable.
try :: Parser a -> Parser a
-try p = Parser $ \i0 a0 m0 kf ks ->
- noAdds i0 a0 m0 $ \i1 a1 m1 ->
- let kf' i2 a2 m2 = addS i0 a0 m0 i2 a2 m2 kf
- in runParser p i1 a1 m1 kf' ks
- where noAdds i0 _a0 m0 f = f i0 mempty m0
+try p = Parser $ \i0 s0 a0 m0 kf ks ->
+ noAdds i0 s0 a0 m0 $ \i1 s1 a1 m1 ->
+ let kf' i2 s2 a2 m2 = addS i0 s0 a0 m0 i2 s2 a2 m2 kf
+ in runParser p i1 s1 a1 m1 kf' ks
+ where noAdds i0 s0 _a0 m0 f = f i0 s0 mempty m0
-addS :: Input -> Added -> More
- -> Input -> Added -> More
- -> (Input -> Added -> More -> r) -> r
-addS i0 a0 m0 _i1 a1 m1 f =
+addS :: Input -> Status -> Added -> More
+ -> Input -> Status -> Added -> More
+ -> (Input -> Status -> Added -> More -> r) -> r
+addS i0 s0 a0 m0 _i1 s1 a1 m1 f =
let !i = case a1 of
Dropped -> i0
- Added s -> i0 `mappend` I s
+ Added bs -> i0 `mappend` I bs
+ !s = s0 `mappend` s1
!a = a0 `mappend` a1
!m = m0 `mappend` m1
- in f i a m
+ in f i s a m
{-# INLINE addS #-}
-- | The parser @satisfy p@ succeeds for any byte for which the
@@ -410,17 +411,17 @@ notWord8 c = satisfy (/= c) <?> "not " ++ show c
-- | Match only if all input has been consumed.
endOfInput :: Parser ()
-endOfInput = Parser $ \i0 a0 m0 kf ks ->
+endOfInput = Parser $ \i0 s0 a0 m0 kf ks ->
if B.null (unI i0)
then if m0 == Complete
- then ks i0 a0 m0 ()
- else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
- \ i2 a2 m2 -> ks i2 a2 m2 ()
- ks' i1 a1 m1 _ = addS i0 a0 m0 i1 a1 m1 $
- \ i2 a2 m2 -> kf i2 a2 m2 []
- "endOfInput"
- in runParser demandInput i0 a0 m0 kf' ks'
- else kf i0 a0 m0 [] "endOfInput"
+ then ks i0 s0 a0 m0 ()
+ else let kf' i1 s1 a1 m1 _ _ = addS i0 s0 a0 m0 i1 s1 a1 m1 $
+ \ i2 s2 a2 m2 -> ks i2 s2 a2 m2 ()
+ ks' i1 s1 a1 m1 _ = addS i0 s0 a0 m0 i1 s1 a1 m1 $
+ \ i2 s2 a2 m2 -> kf i2 s2 a2 m2 []
+ "endOfInput"
+ in runParser demandInput i0 s0 a0 m0 kf' ks'
+ else kf i0 s0 a0 m0 [] "endOfInput"
-- | Return an indication of whether the end of input has been
-- reached.
@@ -437,30 +438,30 @@ endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ())
(<?>) :: Parser a
-> String -- ^ the name to use if parsing fails
-> Parser a
-p <?> msg0 = Parser $ \i0 a0 m0 kf ks ->
- let kf' i a m strs msg = kf i a m (msg0:strs) msg
- in runParser p i0 a0 m0 kf' ks
+p <?> msg0 = Parser $ \i0 s0 a0 m0 kf ks ->
+ let kf' i s a m strs msg = kf i s a m (msg0:strs) msg
+ in runParser p i0 s0 a0 m0 kf' ks
{-# INLINE (<?>) #-}
infix 0 <?>
-- | Terminal failure continuation.
failK :: Failure a
-failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
+failK i0 _s0 _a0 _m0 stack msg = Fail (unI i0) stack msg
{-# INLINE failK #-}
-- | Terminal success continuation.
successK :: Success a a
-successK i0 _a0 _m0 a = Done (unI i0) a
+successK i0 _s0 _a0 _m0 a = Done (unI i0) a
{-# INLINE successK #-}
-- | Run a parser.
parse :: Parser a -> B.ByteString -> Result a
-parse m s = runParser m (I s) mempty Incomplete failK successK
+parse m s = runParser m (I s) Uncommitted mempty Incomplete failK successK
{-# INLINE parse #-}
-- | Run a parser that cannot be resupplied via a 'Partial' result.
parseOnly :: Parser a -> B.ByteString -> Either String a
-parseOnly m s = case runParser m (I s) mempty Complete failK successK of
+parseOnly m s = case runParser m (I s) Uncommitted mempty Complete failK successK of
Fail _ _ err -> Left err
Done _ a -> Right a
_ -> error "parseOnly: impossible error!"
View
37 Data/Attoparsec/Internal/Types.hs
@@ -18,6 +18,7 @@ module Data.Attoparsec.Internal.Types
, Success
, Result(..)
, Input(..)
+ , Status(..)
, Added(..)
, More(..)
, (+++)
@@ -86,14 +87,23 @@ instance Monoid Added where
-- | The 'Parser' type is a monad.
newtype Parser a = Parser {
- runParser :: forall r. Input -> Added -> More
+ runParser :: forall r. Input -> Status -> Added -> More
-> Failure r
-> Success a r
-> Result r
}
-type Failure r = Input -> Added -> More -> [String] -> String -> Result r
-type Success a r = Input -> Added -> More -> a -> Result r
+type Failure r = Input -> Status -> Added -> More -> [String] -> String -> Result r
+type Success a r = Input -> Status -> Added -> More -> a -> Result r
+
+data Status = Uncommitted | Committed
+ deriving (Eq, Show)
+
+instance Monoid Status where
+ mempty = Uncommitted
+ mappend c@Committed _ = c
+ mappend _ c@Committed = c
+ mappend _ _ = Uncommitted
-- | Have we read all available input?
data More = Complete | Incomplete
@@ -106,13 +116,13 @@ instance Monoid More where
mappend _ _ = Incomplete
bindP :: Parser a -> (a -> Parser b) -> Parser b
-bindP m g =
- Parser $ \i0 a0 m0 kf ks -> runParser m i0 a0 m0 kf $
- \i1 a1 m1 a -> runParser (g a) i1 a1 m1 kf ks
+bindP m g = Parser $ \i0 s0 a0 m0 kf ks ->
+ runParser m i0 s0 a0 m0 kf $ \i1 s1 a1 m1 a ->
+ runParser (g a) i1 s1 a1 m1 kf ks
{-# INLINE bindP #-}
returnP :: a -> Parser a
-returnP a = Parser (\i0 a0 m0 _kf ks -> ks i0 a0 m0 a)
+returnP a = Parser (\i0 s0 a0 m0 _kf ks -> ks i0 s0 a0 m0 a)
{-# INLINE returnP #-}
instance Monad Parser where
@@ -121,9 +131,10 @@ instance Monad Parser where
fail = failDesc
plus :: Parser a -> Parser a -> Parser a
-plus a b = Parser $ \i0 a0 m0 kf ks ->
- let kf' i1 a1 m1 _ _ = runParser b i1 a1 m1 kf ks
- in runParser a i0 a0 m0 kf' ks
+plus a b = Parser $ \i0 _s0 a0 m0 kf ks ->
+ let kf' i1 s1@Uncommitted a1 m1 _ _ = runParser b i1 s1 a1 m1 kf ks
+ kf' i1 s1 a1 m1 kf1 ks1 = kf i1 s1 a1 m1 kf1 ks1
+ in runParser a i0 Uncommitted a0 m0 kf' ks
{-# INLINE plus #-}
instance MonadPlus Parser where
@@ -132,8 +143,8 @@ instance MonadPlus Parser where
mplus = plus
fmapP :: (a -> b) -> Parser a -> Parser b
-fmapP p m = Parser $ \i0 a0 m0 f k ->
- runParser m i0 a0 m0 f $ \i1 a1 s1 a -> k i1 a1 s1 (p a)
+fmapP p m = Parser $ \i0 s0 a0 m0 f k ->
+ runParser m i0 s0 a0 m0 f $ \i1 s1 a1 m1 a -> k i1 s1 a1 m1 (p a)
{-# INLINE fmapP #-}
instance Functor Parser where
@@ -174,7 +185,7 @@ instance Alternative Parser where
{-# INLINE (<|>) #-}
failDesc :: String -> Parser a
-failDesc err = Parser (\i0 a0 m0 kf _ks -> kf i0 a0 m0 [] msg)
+failDesc err = Parser (\i0 s0 a0 m0 kf _ks -> kf i0 s0 a0 m0 [] msg)
where msg = "Failed reading: " ++ err
{-# INLINE failDesc #-}
Please sign in to comment.
Something went wrong with that request. Please try again.