/
Lexer.x
330 lines (285 loc) · 13.1 KB
/
Lexer.x
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
{
#if __GLASGOW_HASKELL__ > 800
{-# OPTIONS_GHC -Wno-error=deprecated-flags #-}
{-# OPTIONS_GHC -Wno-error=missing-signatures #-}
{-# OPTIONS_GHC -Wno-error=tabs #-}
{-# OPTIONS_GHC -Wno-error=unused-imports #-}
#endif
{-# OPTIONS_GHC -fno-warn-deprecated-flags #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-| The lexer is generated by Alex (<http://www.haskell.org/alex>) and is an
adaptation of GHC's lexer. The main lexing function 'lexer' is called by
the "Agda.Syntax.Parser.Parser" to get the next token from the input.
-}
module Agda.Syntax.Parser.Lexer
( -- * The main function
lexer
-- * Lex states
, normal, code
, layout, empty_layout, bol, imp_dir
-- * Alex generated functions
, AlexReturn(..), alexScanUser
) where
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Comments
#ifndef __HADDOCK__
import {-# SOURCE #-} Agda.Syntax.Parser.Layout
import {-# SOURCE #-} Agda.Syntax.Parser.LexActions
#endif
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.StringLiterals
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Literal
}
-- Note that the regular expressions should not use non-ASCII
-- characters, see Agda.Syntax.Parser.Alex.alexGetByte.
$digit = 0-9
$hexdigit = [ $digit a-f A-F ]
$binarydigit = 0-1
$alpha = [ A-Z a-z _ ]
$op = [ \- \! \# \$ \% \& \* \+ \/ \< \= \> \^ \| \~ \? \` \[ \] \, \: ]
$idstart = [ $digit $alpha $op ]
$idchar = [ $idstart ' \\ ]
$nonalpha = $idchar # $alpha
$white_notab = $white # \t
$white_nonl = $white_notab # \n
@prettynumber = $digit+ ([_] $digit+)*
| "0x" $hexdigit+ ([_] $hexdigit+)*
| "0b" $binarydigit+ ([_] $binarydigit+)*
@integer = [\-]? @prettynumber
@decimal = $digit+
@exponent = [eE] [\-\+]? @decimal
@float = [\-]? @decimal \. @decimal @exponent?
| [\-]? @decimal @exponent
-- A name can't start with \x (to allow \x -> x).
-- Bug in alex: [ _ op ]+ doesn't seem to work!
@start = ($idstart # [_]) | \\ [ $nonalpha ]
@ident = @start $idchar* | [_] $idchar+
@namespace = (@ident \.)*
@q_ident = @namespace @ident
tokens :-
-- White space
<0,code,bol_,layout_,empty_layout_,imp_dir_>
$white_nonl+ ;
<pragma_,fpragma_> $white_notab ;
-- Pragmas
<0,code,pragma_> "{-#" { beginWith pragma $ symbol SymOpenPragma }
<fpragma_> "{-#" { beginWith fpragma $ symbol SymOpenPragma }
<pragma_,fpragma_> "#-}" { endWith $ symbol SymClosePragma }
<pragma_> "BUILTIN" { keyword KwBUILTIN }
<pragma_> "CATCHALL" { keyword KwCATCHALL }
<pragma_> "COMPILE" { endWith $ beginWith fpragma $ keyword KwCOMPILE }
<pragma_> "FOREIGN" { endWith $ beginWith fpragma $ keyword KwFOREIGN }
<pragma_> "DISPLAY" { keyword KwDISPLAY }
<pragma_> "ETA" { keyword KwETA }
<pragma_> "IMPOSSIBLE" { keyword KwIMPOSSIBLE }
<pragma_> "INJECTIVE" { keyword KwINJECTIVE }
<pragma_> "INJECTIVE_FOR_INFERENCE" { keyword KwINJECTIVE_FOR_INFERENCE }
<pragma_> "INLINE" { keyword KwINLINE }
<pragma_> "INCOHERENT" { keyword KwINCOHERENT }
<pragma_> "NOINLINE" { keyword KwNOINLINE }
<pragma_> "NOT_PROJECTION_LIKE" { keyword KwNOT_PROJECTION_LIKE }
<pragma_> "LINE" { keyword KwLINE }
<pragma_> "MEASURE" { keyword KwMEASURE }
<pragma_> "NO_POSITIVITY_CHECK" { keyword KwNO_POSITIVITY_CHECK }
<pragma_> "NO_TERMINATION_CHECK" { keyword KwNO_TERMINATION_CHECK }
<pragma_> "NO_UNIVERSE_CHECK" { keyword KwNO_UNIVERSE_CHECK }
<pragma_> "NON_COVERING" { keyword KwNON_COVERING }
<pragma_> "NON_TERMINATING" { keyword KwNON_TERMINATING }
<pragma_> "OPTIONS" { keyword KwOPTIONS }
<pragma_> "POLARITY" { keyword KwPOLARITY }
<pragma_> "OVERLAPPABLE" { keyword KwOVERLAPPABLE }
<pragma_> "OVERLAPPING" { keyword KwOVERLAPPING }
<pragma_> "OVERLAPS" { keyword KwOVERLAPS }
<pragma_> "REWRITE" { keyword KwREWRITE }
<pragma_> "STATIC" { keyword KwSTATIC }
<pragma_> "TERMINATING" { keyword KwTERMINATING }
<pragma_> "WARNING_ON_USAGE" { keyword KwWARNING_ON_USAGE }
<pragma_> "WARNING_ON_IMPORT" { keyword KwWARNING_ON_IMPORT }
<pragma_> . # [ $white \" ] + { withInterval $ TokString } -- we recognise string literals in pragmas
<fpragma_> . # [ $white ] + { withInterval $ TokString }
-- Comments
-- We need to rule out pragmas here. Usually longest match would take
-- precedence, but in some states pragmas aren't valid but comments are.
<0,code,bol_,layout_,empty_layout_,imp_dir_>
"{-" / { not' (followedBy '#') } { nestedComment }
-- A misplaced end-comment, like in @f {x-} = x-@ gives a parse error.
"-}" { symbol SymEndComment }
@ident "-}" { symbol SymEndComment }
-- Dashes followed by a name symbol should be parsed as a name.
<0,code,bol_,layout_,empty_layout_,imp_dir_>
"--" .* / { keepComments .&&. (followedBy '\n' .||. eof) }
{ confirmLayout `andThen` withInterval TokComment }
<0,code,bol_,layout_,empty_layout_,imp_dir_>
"--" .* / { followedBy '\n' .||. eof }
{ confirmLayout `andThen` skip }
-- Note: we need to confirm tentative layout columns whenever we meet
-- a newline character ('\n').
-- The exception is the newline after a layout keyword.
-- We need to check the offside rule for the first token on each line. We
-- should not check the offside rule for the end of file token or an
-- '\end{code}'
<0,code,imp_dir_> \n { begin bol_ } -- Note that @begin@ revisits '\n' in the new state!
<bol_>
{
\n { confirmLayout `andThen` skip }
-- ^ \\ "end{code}" { end }
() / { not' eof } { offsideRule }
}
-- After a layout keyword the
-- indentation of the first token decides the column of the layout block.
<layout_>
{ \n { confirmLayout `andThen` skip}
() { endWith newLayoutBlock }
}
-- The only rule for the empty_layout state. Generates a close brace.
<empty_layout_> () { emptyLayout }
-- Keywords
<0,code> abstract { keyword KwAbstract }
<0,code> codata { keyword KwCoData }
<0,code> coinductive { keyword KwCoInductive }
<0,code> constructor { keyword KwConstructor }
<0,code> data { keyword KwData }
<0,code> do { keyword KwDo }
<0,code> "eta-equality" { keyword KwEta }
<0,code> field { keyword KwField }
<0,code> forall { keyword KwForall }
<0,code> import { keyword KwImport }
<0,code> in { keyword KwIn }
<0,code> inductive { keyword KwInductive }
<0,code> infix { keyword KwInfix }
<0,code> infixl { keyword KwInfixL }
<0,code> infixr { keyword KwInfixR }
<0,code> instance { keyword KwInstance }
<0,code> interleaved { keyword KwInterleaved }
<0,code> let { keyword KwLet }
<0,code> macro { keyword KwMacro }
<0,code> module { keyword KwModule }
<0,code> mutual { keyword KwMutual }
<0,code> "no-eta-equality" { keyword KwNoEta }
<0,code> open { keyword KwOpen }
<0,code> overlap { keyword KwOverlap }
<0,code> pattern { keyword KwPatternSyn }
<0,code> postulate { keyword KwPostulate }
<0,code> primitive { keyword KwPrimitive }
<0,code> private { keyword KwPrivate }
<0,code> quote { keyword KwQuote }
<0,code> quoteTerm { keyword KwQuoteTerm }
<0,code> record { keyword KwRecord }
<0,code> rewrite { keyword KwRewrite }
<0,code> syntax { keyword KwSyntax }
<0,code> tactic { keyword KwTactic }
<0,code> unquote { keyword KwUnquote }
<0,code> unquoteDecl { keyword KwUnquoteDecl }
<0,code> unquoteDef { keyword KwUnquoteDef }
<0,code> variable { keyword KwVariable }
<0,code> where { keyword KwWhere }
<0,code> with { keyword KwWith }
<0,code> opaque { keyword KwOpaque }
<0,code> unfolding { keyword KwUnfolding }
-- The parser is responsible to put the lexer in the imp_dir_ state when it
-- expects an import directive keyword. This means that if you run the
-- tokensParser you will never see these keywords.
<0,code> using { keyword KwUsing }
<0,code> hiding { keyword KwHiding }
<0,code> renaming { keyword KwRenaming }
<imp_dir_> to { endWith $ keyword KwTo }
<0,code> public { keyword KwPublic }
-- Holes
<0,code> "{!" { hole }
-- Special symbols
<0,code> "..." { symbol SymEllipsis }
<0,code> ".." { symbol SymDotDot }
<0,code> "." { symbol SymDot }
<0,code> ";" { symbol SymSemi }
<0,code> ":" { symbol SymColon }
<0,code> "=" { symbol SymEqual }
<0,code> "_" { symbol SymUnderscore }
<0,code> "?" { symbol SymQuestionMark }
<0,code> "|" { symbol SymBar }
<0,code> "(|" /[$white] { symbol SymOpenIdiomBracket }
<0,code> "|)" { symbol SymCloseIdiomBracket }
<0,code> "(|)" { symbol SymEmptyIdiomBracket }
<0,code> "(" { symbol SymOpenParen }
<0,code> ")" { symbol SymCloseParen }
<0,code> "->" { symbol SymArrow }
<0,code> "\" { symbol SymLambda } -- "
<0,code> "@" { symbol SymAs }
<0,code> "{{" /[^[!\-]] { symbol SymDoubleOpenBrace }
-- Andreas, 2019-08-08, issue #3962, don't lex '{{' if followed by '-'
-- since this will be confused with '{-' (start of comment) by Emacs.
-- We don't lex '}}' into a SymDoubleCloseBrace. Instead, we lex it as
-- two SymCloseBrace's. When the parser is looking for a double
-- closing brace, it will also accept two SymCloseBrace's, after
-- verifying that they are immediately next to each other.
-- This trick allows us to keep "record { a = record {}}" working
-- properly.
-- <0,code> "}}" { symbol SymDoubleCloseBrace }
<0,code> "{" { symbol SymOpenBrace } -- you can't use braces for layout
<0,code> "}" { symbol SymCloseBrace }
-- Literals
<0,code> \' { litChar }
<0,code,pragma_> \" { litString }
<0,code> @integer { literal' integer LitNat }
<0,code> @float { literal LitFloat }
-- Identifiers
<0,code,imp_dir_> @q_ident { identifier }
-- Andreas, 2013-02-21, added identifiers to the 'imp_dir_' state.
-- This is to fix issue 782: 'toz' should not be lexed as 'to'
-- (followed by 'z' after leaving imp_dir_).
-- With identifiers in state imp_dir_, 'toz' should be lexed as
-- identifier 'toz' in imp_dir_ state, leading to a parse error later.
{
-- | This is the initial state for parsing a regular, non-literate file.
normal :: LexState
normal = 0
{-| The layout state. Entered when we see a layout keyword ('withLayout') and
exited at the next token ('newLayoutBlock').
-}
layout :: LexState
layout = layout_
{-| The state inside a pragma.
-}
pragma :: LexState
pragma = pragma_
-- | The state inside a FOREIGN pragma. This needs to be different so that we don't
-- lex further strings as pragma keywords.
fpragma :: LexState
fpragma = fpragma_
{-| We enter this state from 'newLayoutBlock' when the token following a
layout keyword is to the left of (or at the same column as) the current
layout context. Example:
> data Empty : Set where
> foo : Empty -> Nat
Here the second line is not part of the @where@ clause since it is has the
same indentation as the @data@ definition. What we have to do is insert an
empty layout block @{}@ after the @where@. The only thing that can happen
in this state is that 'emptyLayout' is executed, generating the closing
brace. The open brace is generated when entering by 'newLayoutBlock'.
-}
empty_layout :: LexState
empty_layout = empty_layout_
-- | This state is entered at the beginning of each line. You can't lex
-- anything in this state, and to exit you have to check the layout rule.
-- Done with 'offsideRule'.
bol :: LexState
bol = bol_
-- | This state can only be entered by the parser. In this state you can only
-- lex the keywords @using@, @hiding@, @renaming@ and @to@. Moreover they are
-- only keywords in this particular state. The lexer will never enter this
-- state by itself, that has to be done in the parser.
imp_dir :: LexState
imp_dir = imp_dir_
-- | Return the next token. This is the function used by Happy in the parser.
--
-- @lexer k = 'lexToken' >>= k@
lexer :: (Token -> Parser a) -> Parser a
lexer k = lexToken >>= k
-- | Do not use this function; it sets the 'ParseFlags' to
-- 'undefined'.
alexScan :: AlexInput -> Int -> AlexReturn (LexAction Token)
-- | This is the main lexing function generated by Alex.
alexScanUser :: ([LexState], ParseFlags) -> AlexInput -> Int -> AlexReturn (LexAction Token)
}