forked from snoyberg/conduit
/
Attoparsec.hs
132 lines (115 loc) · 4.5 KB
/
Attoparsec.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
-- |
-- Copyright: 2011 Michael Snoyman, 2010 John Millikin
-- License: MIT
--
-- Turn an Attoparsec parser into a 'C.Sink' or 'C.Conduit'.
--
-- This code was taken from attoparsec-enumerator and adapted for conduits.
module Data.Conduit.Attoparsec
( ParseError (..)
, AttoparsecInput
, ConduitInput
, sinkParser
, conduitParser
) where
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import qualified Data.ByteString as B
import qualified Data.Text as T
import qualified Data.Attoparsec.ByteString
import qualified Data.Attoparsec.Text
import qualified Data.Attoparsec.Types as A
import qualified Data.Conduit as C
import Data.Monoid
import Control.Monad.Trans.Class (lift)
-- | The context and message from a 'A.Fail' value.
data ParseError = ParseError
{ errorContexts :: [String]
, errorMessage :: String
} | DivergentParser
deriving (Show, Typeable)
instance Exception ParseError
-- | A class of types which may be consumed by an Attoparsec parser.
class AttoparsecInput a where
parseA :: A.Parser a b -> a -> A.IResult a b
feedA :: A.IResult a b -> a -> A.IResult a b
empty :: a
isNull :: a -> Bool
notEmpty :: [a] -> [a]
-- | Extra functions needed for conduitParser
class (AttoparsecInput a, Monoid a) => ConduitInput a where
append :: a -> a -> a
append = mappend
instance AttoparsecInput B.ByteString where
parseA = Data.Attoparsec.ByteString.parse
feedA = Data.Attoparsec.ByteString.feed
empty = B.empty
isNull = B.null
notEmpty = filter (not . B.null)
instance ConduitInput B.ByteString
instance AttoparsecInput T.Text where
parseA = Data.Attoparsec.Text.parse
feedA = Data.Attoparsec.Text.feed
empty = T.empty
isNull = T.null
notEmpty = filter (not . T.null)
-- | Convert an Attoparsec 'A.Parser' into a 'C.Sink'. The parser will
-- be streamed bytes until it returns 'A.Done' or 'A.Fail'.
--
-- If parsing fails, a 'ParseError' will be thrown with 'C.resourceThrow'.
sinkParser :: (AttoparsecInput a, C.ResourceThrow m) => A.Parser a b -> C.Sink a m b
sinkParser p0 = C.sinkState
(parseA p0)
push
close
where
push parser c | isNull c = return (C.StateProcessing parser)
push parser c =
case parser c of
A.Done leftover x ->
let lo = if isNull leftover then Nothing else Just leftover
in return (C.StateDone lo x)
A.Fail _ contexts msg -> lift $ C.resourceThrow $ ParseError contexts msg
A.Partial p -> return (C.StateProcessing p)
close parser = do
case feedA (parser empty) empty of
A.Done _leftover y -> return y
A.Fail _ contexts msg -> lift $ C.resourceThrow $ ParseError contexts msg
A.Partial _ -> lift $ C.resourceThrow DivergentParser
-- | Convert an Attoparsec 'A.Parser' into a 'C.Conduit'. The parser will
-- be streamed bytes until the source is exhausted. When done is returned a new
-- parser is created and fed with anything leftover in the stream before resuming.
--
-- If parsing fails, a 'ParseError' will be thrown with 'C.resourceThrow'.
conduitParser :: (ConduitInput a, C.ResourceThrow m) =>
A.Parser a b
-> C.Conduit a m b
conduitParser p0 = C.conduitState
(parseA p0)
push
close
where
push parser c | isNull c = return (parser, C.Producing [])
push parser c = {-# SCC "push" #-} do
case doParse parser c [] of
Left pErr -> lift $ C.resourceThrow pErr
Right (cont, results) -> return (cont, C.Producing $ reverse results)
-- doParse :: (A.Parser a b) -> a -> [b]
-- -> Either ParseError ((a -> A.IResult a b), [b])
doParse parser inp results = {-# SCC "parse" #-}
case parser inp of
A.Done leftover x
| isNull leftover ->
Right (parseA p0, x : results)
| otherwise ->
doParse (parseA p0) leftover (x:results)
A.Fail _ contexts msg -> Left $ ParseError contexts msg
A.Partial p -> return (p, results)
close parser = do
case feedA (parser empty) empty of
A.Done _leftover y -> return [y]
A.Fail leftover _ _ | isNull leftover -> return []
A.Fail _ contexts msg -> lift $ C.resourceThrow $ ParseError contexts ("closing " ++ msg)
A.Partial _ -> lift $ C.resourceThrow DivergentParser