/
GParser.ml
310 lines (276 loc) · 9.94 KB
/
GParser.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
(**********************************************************
* Arbogen-tool : fast uniform random generation of trees *
**********************************************************
* Module: GParser *
* ------- *
* Tree grammar parser *
* ------- *
* (C) 2011, Xuming Zhan, Frederic Peschanski *
* Antonine Genitrini under the *
* GNU GPL v.3 licence (cf. LICENSE file) *
**********************************************************)
open Printf
open Options
open Grammar
exception Parse_Error of string;;
type character =
Char of char
| EOF;;
let contains (s1:string) (s2:string) =
try
let len = String.length s2 in
for i = 0 to String.length s1 - len do
if String.sub s1 i len = s2 then raise Exit
done;
false
with Exit -> true
let is_space = function
| Char(ch) -> ch == ' ' or ch == '\n' or ch == '\t' or ch == '\r'
| EOF -> false ;;
let get_char (str:string) (i:int) =
try Char (String.get str i) with
_ -> EOF;;
let rec skip_spaces (str:string) (i:int) =
let ichar = get_char str i in
if is_space ichar then skip_spaces str (i+1)
else i ;;
let rec skip_until_eol (str:string) (i:int) =
let ichar = get_char str i in
match ichar with
| EOF -> i
| Char(ch) ->
if ch='\n' then i+1
else skip_until_eol str (i+1);;
let rec skip_until_starslash (str:string) (i:int) =
let ichar = get_char str i in
match ichar with
| EOF -> raise (Parse_Error "Missing end of comment: */")
| Char(ch) -> if ch='*' then (match get_char str (i+1) with
| EOF -> raise (Parse_Error "Missing end of comment after *")
| Char(ch') ->
if ch'='/' then i+2
else skip_until_starslash str (i+1))
else skip_until_starslash str (i+1) ;;
(* skip_until_starslash "/* toto titi tata */ tutu" 2 ;; *)
let rec skip_comments (str:string) (i:int) =
let ichar = get_char str i in
match ichar with
| EOF -> i
| Char(ch) ->
if is_space ichar then skip_comments str (i+1)
else if ch = '/' then
(match get_char str (i+1) with
| EOF -> i
| Char(ch') ->
if ch' = '/' then skip_until_eol str i
else if ch' = '*' then skip_until_starslash str (i+1)
else i)
else i;;
(* skip_comments "/* toto titi tata */ tutu" 0 ;; *)
let rec skip (str:string) (i:int) =
let i' = skip_comments str i in
if not (i'==i) then skip str i'
else i;;
let list_iteri f l =
let rec aux i l = match l with
| [] -> ()
| e::l' -> (f i e) ; aux (i+1) l'
in
aux 0 l ;;
let string_of_list l =
let len = List.length l in
let str = String.create len
in
list_iteri (fun ch i -> String.set str ch i) l ;
str ;;
let next_word (str:string) (i:int) =
let rec aux i word =
let ichar = get_char str i in
match ichar with
| EOF -> (List.rev word,i)
| Char(ch) ->
if is_space ichar then
(List.rev word,i)
else if ch='*' or ch='+' or ch=';' then
(match word with
| [] -> ([ch], i+1)
| _ -> (List.rev word, i))
else aux (i+1) (ch::word)
in
let (word,i') = aux (skip str i) [] in
(string_of_list word,i') ;;
let advance (str:string) (i:int) (expect:string) =
let (word,i') = next_word str i
in
if word = expect then i'
else raise (Parse_Error ("Missing '" ^ expect ^ "'")) ;;
let parse_component (str:string) (i:int) =
let rec aux i weight refs =
let (componentName,i') = next_word str i in
if componentName="<z>" then
let (next,i'') = next_word str i' in
(if next="*" then
aux i'' (weight+1) refs
else if next="+" or next=";" then
match refs with
| [] -> raise (Parse_Error "Missing reference after <z>")
| _ -> ((weight+1,List.rev refs),i')
else
raise (Parse_Error "Expecting '*', '+' or ';' after <z>"))
else if componentName="*" or componentName="+" then
raise (Parse_Error ("Unexpected '" ^ componentName ^ "'"))
else if (contains componentName "SEQ(") == true then
let start = String.index componentName '(' in
let stop = String.index componentName ')' in
let name = String.sub componentName (start+1) (stop-start-1) in
let (next,i'') = next_word str i' in
if next="+" or next =";" then
((weight,List.rev ((SEQ name)::refs)),i')
else if next="*" then
aux i'' weight ((SEQ name)::refs)
else raise (Parse_Error "Expecting '+', ';' or '*'")
else (* component Name is ok *)
let (next,i'') = next_word str i' in
if next="+" or next =";" then
((weight,List.rev ((ELEM componentName)::refs)),i')
else if next="*" then
aux i'' weight ((ELEM componentName)::refs)
else raise (Parse_Error "Expecting '+', ';' or '*'")
in
aux i 0 [] ;;
(* parse_component "<z> * BinNode * BinNode +" 0 ;; *)
let parse_components (str:string) (i:int) =
let rec aux i comps =
let (comp,i') = parse_component str i in
let (next,i'') = next_word str i' in
if next="+" then
aux i'' (comp::comps)
else if next=";" then
(List.rev (comp::comps), i'')
else raise (Parse_Error ("Expecting '+' or ';' after component"))
in
aux i [] ;;
(* parse_components "Leaf * <z> + BinNode * BinNode ;" 0 ;; *)
let parse_rule (str:string) (i:int) =
let (ruleName,i') = next_word str i in
if ruleName="" then
raise (Parse_Error "Missing rule name")
else
let i'' = advance str i' "::=" in
let (components,i''') = parse_components str i''
in
((ruleName,components),i''') ;;
(* parse_rule "BinNode ::= Leaf * <z> + BinNode * BinNode ;" 0 ;; *)
let parse_int (str:string) (i:int) =
let int_str, i' = next_word str i
in
try
let int_val = int_of_string int_str
in
(int_val, i')
with Failure _ -> raise (Parse_Error (sprintf "cannot convert '%s' to an integer" int_str))
let parse_float (str:string) (i:int) =
let float_str, i' = next_word str i
in
try
let float_val = float_of_string float_str
in
(float_val, i')
with Failure _ -> raise (Parse_Error (sprintf "cannot convert '%s' to a float" float_str))
let parse_option (str:string) (i:int) =
let opt_id, i' = next_word str i
in match opt_id with
| "min" ->
let min_val, i' = parse_int str i'
in
(if min_val < 0
then raise (Option_Error (sprintf "incorrect minimal size %d => should be positive" min_val))
else if not global_options.size_min_set
then global_options.size_min <- min_val) ;
advance str i' ";"
| "max" ->
let max_val, i' = parse_int str i'
in
(if max_val < 0
then raise (Option_Error (sprintf "incorrect maximal size %d => should be positive" max_val))
else if not global_options.size_max_set
then global_options.size_max <- max_val) ;
advance str i' ";"
| "try" ->
let try_val, i' = parse_int str i'
in
(if try_val <= 0
then raise (Option_Error (sprintf "incorrect minimal try number %d => should be strictly positive" try_val))
else if not global_options.max_try_set
then global_options.max_try <- try_val) ;
advance str i' ";"
| "eps1" | "epsilon1" ->
let eps1_val, i' = parse_float str i'
in
(if eps1_val <= 0.0
then raise (Option_Error (sprintf "incorrect epsilon 1 %f => should be strictly positive" eps1_val))
else if not global_options.epsilon1_set
then global_options.epsilon1 <- eps1_val) ;
advance str i' ";"
| "eps1_factor" | "epsilon1_factor" ->
let eps1_val, i' = parse_float str i'
in
(if eps1_val <= 0.0
then raise (Option_Error (sprintf "incorrect epsilon 1 factor %f => should be strictly positive" eps1_val))
else if not global_options.epsilon1_factor_set
then global_options.epsilon1_factor <- eps1_val) ;
advance str i' ";"
| "eps2" | "epsilon2" ->
let eps2_val, i' = parse_float str i'
in
(if eps2_val <= 0.0
then raise (Option_Error (sprintf "incorrect epsilon 2 %f => should be strictly positive" eps2_val))
else if not global_options.epsilon2_set
then global_options.epsilon2 <- eps2_val) ;
advance str i' ";"
| "eps2_factor" | "epsilon2_factor" ->
let eps2_val, i' = parse_float str i'
in
(if eps2_val <= 0.0
then raise (Option_Error (sprintf "incorrect epsilon 2 factor %f => should be strictly positive" eps2_val))
else if not global_options.epsilon2_factor_set
then global_options.epsilon2_factor <- eps2_val) ;
advance str i' ";"
| "zstart" ->
let start, i' = parse_float str i'
in
(if (start > 1.0 || start < 0.0) then
raise (Option_Error (sprintf "incorrect zstart value %f => should be between 0 and 1" start))
else
global_options.zstart <- start);
advance str i' ";"
| _ -> raise (Parse_Error (sprintf "Uknown or unsupported option: %s" opt_id))
let parse_grammar (str:string) =
let rec aux i rules =
match next_word str i with
("",_) -> List.rev rules
| ("set", i') ->
let i'' = parse_option str i'
in
aux i'' rules
| _ ->
let (rul,i') = parse_rule str i
in aux i' (rul::rules)
in
aux 0 [] ;;
(* parse_grammar "BinNode ::= Leaf * <z> + BinNode * BinNode;" ;; *)
let string_of_file (fname:string) =
let inchan = open_in fname in
let rec read str =
try let next = input_line inchan in
read (str ^ "\n" ^ next)
with End_of_file -> close_in inchan ; str
in
read "" ;;
let parse_from_file (fname:string) =
let input = string_of_file fname in
let grm = parse_grammar input
in
if Options.global_options.verbosity >= 3
then printf "[GEN]: Parsed grammar = %s\n%!" (Grammar.string_of_grammar (List.map (fun (n,l) -> (ELEM n,l)) grm)) ;
grm ;;