This repository has been archived by the owner on Nov 28, 2018. It is now read-only.
/
Combinators.hs
128 lines (111 loc) · 3.54 KB
/
Combinators.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
{-# LANGUAGE FlexibleContexts #-}
-- | Parser combinators.
module ACE.Combinators where
import ACE.Types.Tokens
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec.Pos
import Text.Parsec.Prim
-- | Match a word with the given string.
string :: Stream s m Token => Text -> ParsecT s u m Text
string s =
satisfy
(\t ->
case t of
Word _ t' -> if t' == s then Just t' else Nothing
_ -> Nothing)
-- | Match a Saxon genitive.
genitive :: Stream s m Token => ParsecT s u m Bool
genitive =
satisfy
(\t ->
case t of
Genitive _ hasS -> Just hasS
_ -> Nothing)
-- | Match a word with the given string.
number :: Stream s m Token => ParsecT s u m Integer
number =
satisfy
(\t ->
case t of
Number _ t' -> Just t'
_ -> Nothing)
-- | Quoted string.
quoted :: Stream s m Token => ParsecT s u m Text
quoted =
satisfy
(\t ->
case t of
QuotedString _ t' -> Just t'
_ -> Nothing)
-- | A comma.
comma :: Stream s m Token => ParsecT s u m ()
comma =
satisfy
(\t ->
case t of
Comma _ -> Just ()
_ -> Nothing)
-- | A period.
period :: Stream s m Token => ParsecT s u m ()
period =
satisfy
(\t ->
case t of
Period _ -> Just ()
_ -> Nothing)
-- | Try to match all the given strings, or none at all.
strings :: Stream s m Token => [Text] -> ParsecT s u m ()
strings ss =
try (sequence_ (map string ss))
-- | Satisfy the given predicate from the token stream.
satisfy :: Stream s m Token => (Token -> Maybe a) -> ParsecT s u m a
satisfy f =
tokenPrim tokenString
tokenPosition
f
-- | The parser @anyToken@ accepts any kind of token. It is for example
-- used to implement 'eof'. Returns the accepted token.
anyToken :: (Stream s m Token) => ParsecT s u m Token
anyToken = satisfy Just
-- | Make a string out of the token, for error message purposes.
tokenString :: Token -> [Char]
tokenString t =
case t of
Word _ w -> "word \"" ++ T.unpack w ++ "\""
QuotedString _ s -> "quotation \"" ++ T.unpack s ++ "\""
Period{} -> "period"
Comma{} -> "comma"
QuestionMark{} -> "question mark"
Genitive _ s ->
if s
then "genitive 's"
else "genitive '"
Number _ n -> "number: " ++ show n
-- | Update the position by the token.
tokenPosition :: SourcePos -> Token -> t -> SourcePos
tokenPosition pos t _ =
setSourceColumn (setSourceLine pos line) col
where (line,col) = tokenPos t
-- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser
-- does not consume any input. This parser can be used to implement the
-- \'longest match\' rule. For example, when recognizing keywords (for
-- example @let@), we want to make sure that a keyword is not followed
-- by a legal identifier character, in which case the keyword is
-- actually an identifier (for example @lets@). We can program this
-- behaviour as follows:
--
-- > keywordLet = try (do{ string "let"
-- > ; notFollowedBy alphaNum
-- > })
notFollowedBy :: (Stream s m Token) => ParsecT s u m Token -> ParsecT s u m ()
notFollowedBy p =
try ((do c <- try p
unexpected (tokenString c)) <|>
return ())
-- | This parser only succeeds at the end of the input. This is not a
-- primitive parser but it is defined using 'notFollowedBy'.
--
-- > eof = notFollowedBy anyToken <?> "end of input"
eof :: (Stream s m Token) => ParsecT s u m ()
eof = notFollowedBy anyToken <?> "end of input"