/
lexer.mll
202 lines (161 loc) · 5.26 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
{
open Language
let reservedWords = [
(* Keywords *)
("lam", fun i -> Parser.LAMBDA i);
("All", fun i -> Parser.ALL i);
("Pi", fun i -> Parser.PI i);
(* Symbols *)
("_", fun i -> Parser.USCORE i);
("'", fun i -> Parser.APOSTROPHE i);
("\"", fun i -> Parser.DQUOTE i);
("!", fun i -> Parser.BANG i);
("#", fun i -> Parser.HASH i);
("$", fun i -> Parser.TRIANGLE i);
("*", fun i -> Parser.STAR i);
("|", fun i -> Parser.VBAR i);
(".", fun i -> Parser.DOT i);
(";", fun i -> Parser.SEMI i);
(",", fun i -> Parser.COMMA i);
("/", fun i -> Parser.SLASH i);
(":", fun i -> Parser.COLON i);
("::", fun i -> Parser.COLONCOLON i);
("=", fun i -> Parser.EQ i);
("==", fun i -> Parser.EQEQ i);
("[", fun i -> Parser.LSQUARE i);
("<", fun i -> Parser.LT i);
("{", fun i -> Parser.LCURLY i);
("(", fun i -> Parser.LPAREN i);
("<-", fun i -> Parser.LEFTARROW i);
("{|", fun i -> Parser.LCURLYBAR i);
("[|", fun i -> Parser.LSQUAREBAR i);
("}", fun i -> Parser.RCURLY i);
(")", fun i -> Parser.RPAREN i);
("]", fun i -> Parser.RSQUARE i);
(">", fun i -> Parser.GT i);
("|}", fun i -> Parser.BARRCURLY i);
("|>", fun i -> Parser.BARGT i);
("|]", fun i -> Parser.BARRSQUARE i);
(* Special compound symbols: *)
(":=", fun i -> Parser.COLONEQ i);
("->", fun i -> Parser.ARROW i);
("=>", fun i -> Parser.DARROW i);
("==>", fun i -> Parser.DDARROW i);
]
(* Support functions *)
type buildfun = info -> Parser.token
let (symbolTable : (string,buildfun) Hashtbl.t) = Hashtbl.create 1024
let _ =
List.iter (fun (str,f) -> Hashtbl.add symbolTable str f) reservedWords
let createID i str =
try (Hashtbl.find symbolTable str) i
with _ ->
if (String.get str 0) >= 'A' && (String.get str 0) <= 'Z' then
Parser.UCID {i=i;v=str}
else
Parser.LCID {i=i;v=str}
let lineno = ref 1
and depth = ref 0
and start = ref 0
and filename = ref ""
and startLex = ref dummyinfo
let create inFile stream =
if not (Filename.is_implicit inFile) then filename := inFile
else filename := Filename.concat (Sys.getcwd()) inFile;
lineno := 1; start := 0; Lexing.from_channel stream
let newline lexbuf = incr lineno; start := (Lexing.lexeme_start lexbuf)
let info lexbuf =
makeinfo (!filename) (!lineno) (Lexing.lexeme_start lexbuf - !start)
let text = Lexing.lexeme
let stringBuffer = ref (Bytes.create 2048)
let stringEnd = ref 0
let resetStr () = stringEnd := 0
let addStr ch =
let x = !stringEnd in
let buffer = !stringBuffer
in
if x = Bytes.length buffer then
begin
let newBuffer = Bytes.create (x*2) in
Bytes.blit buffer 0 newBuffer 0 x;
Bytes.set newBuffer x ch;
stringBuffer := newBuffer;
stringEnd := x+1
end
else
begin
Bytes.set buffer x ch;
stringEnd := x+1
end
let getStr () = Bytes.sub_string (!stringBuffer) 0 (!stringEnd)
let extractLineno yytext offset =
int_of_string (String.sub yytext offset (String.length yytext - offset))
}
(* The main body of the lexical analyzer *)
rule main = parse
[' ' '\009' '\012']+ { main lexbuf }
| [' ' '\009' '\012']*("\r")?"\n" { newline lexbuf; main lexbuf }
| "*/" { failwith "Unmatched end of comment" }
| "/*" { depth := 1; startLex := info lexbuf; comment lexbuf; main lexbuf }
| "# " ['0'-'9']+
{ lineno := extractLineno (text lexbuf) 2 - 1; getFile lexbuf }
| "# line " ['0'-'9']+
{ lineno := extractLineno (text lexbuf) 7 - 1; getFile lexbuf }
| ['0'-'9']+
{ Parser.INTV{i=info lexbuf; v=int_of_string (text lexbuf)} }
| ['0'-'9']+ '.' ['0'-'9']+
{ Parser.FLOATV{i=info lexbuf; v=float_of_string (text lexbuf)} }
| ['A'-'Z' 'a'-'z' '_']
['A'-'Z' 'a'-'z' '_' '0'-'9' '\'']*
{ createID (info lexbuf) (text lexbuf) }
| ":=" | "<:" | "<-" | "->" | "=>" | "==>"
| "{|" | "|}" | "<|" | "|>" | "[|" | "|]" | "=="
{ createID (info lexbuf) (text lexbuf) }
| ['~' '%' '\\' '+' '-' '&' '|' ':' '@' '`' '$']+
{ createID (info lexbuf) (text lexbuf) }
| ['*' '#' '/' '!' '?' '^' '(' ')' '{' '}' '[' ']' '<' '>' '.' ';' '_' ','
'=' '\'']
{ createID (info lexbuf) (text lexbuf) }
| "\"" { resetStr(); startLex := info lexbuf; string lexbuf }
| eof { Parser.EOF(info lexbuf) }
| _ { failwith "Illegal character" }
and comment = parse
"/*"
{ depth := succ !depth; comment lexbuf }
| "*/"
{ depth := pred !depth; if !depth > 0 then comment lexbuf }
| eof
{ failwith "Comment not terminated" }
| [^ '\n']
{ comment lexbuf }
| "\n"
{ newline lexbuf; comment lexbuf }
and getFile = parse
" "* "\"" { getName lexbuf }
and getName = parse
[^ '"' '\n']+ { filename := (text lexbuf); finishName lexbuf }
and finishName = parse
'"' [^ '\n']* { main lexbuf }
and string = parse
'"' { Parser.STRINGV {i = !startLex; v=getStr()} }
| '\\' { addStr(escaped lexbuf); string lexbuf }
| '\n' { addStr '\n'; newline lexbuf; string lexbuf }
| eof { failwith "String not terminated" }
| _ { addStr (Lexing.lexeme_char lexbuf 0); string lexbuf }
and escaped = parse
'n' { '\n' }
| 't' { '\t' }
| '\\' { '\\' }
| '"' { '\034' }
| '\'' { '\'' }
| ['0'-'9']['0'-'9']['0'-'9']
{
let x = int_of_string(text lexbuf) in
if x > 255 then
failwith "Illegal character constant"
else
Char.chr x
}
| [^ '"' '\\' 't' 'n' '\'']
{ failwith "Illegal character constant" }
(* *)