This repository has been archived by the owner on Jun 4, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 204
/
lib_parsing_php.ml.nw
457 lines (400 loc) · 12.5 KB
/
lib_parsing_php.ml.nw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
<<lib_parsing_php.ml>>=
<<Facebook copyright>>
open Common
<<basic pfff module open and aliases>>
module V = Visitor_php
module V2 = Map_php
(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
let pr2, pr2_once = Common.mk_pr2_wrappers Flag.verbose_parsing
(*****************************************************************************)
(* Filenames *)
(*****************************************************************************)
let is_php_script file =
Common.with_open_infile file (fun chan ->
try
let l = input_line chan in
l =~ "#!/usr/.*/php" ||
l =~ "#!/bin/env php" ||
l =~ "#!/usr/bin/env php"
with End_of_file -> false
)
let is_php_filename filename =
(filename =~ ".*\\.php$") || (filename =~ ".*\\.phpt$")
let is_php_file filename =
is_php_filename filename || is_php_script filename
(*
* In command line tools like git or mercurial, many operations works
* when a file, a set of files, or even dirs are passed as parameters.
* We want the same with pfff, hence this small helper function that
* transform such files_or_dirs into a flag set of filenames.
*)
let find_php_files_of_dir_or_files ?(verbose=false) xs =
Common.files_of_dir_or_files_no_vcs_nofilter xs
+> List.filter (fun filename ->
let valid = is_php_file filename in
if not valid && verbose
then pr2 ("not analyzing: " ^ filename);
valid
) |> Common.sort
(*****************************************************************************)
(* Extract infos *)
(*****************************************************************************)
<<extract infos>>
(*****************************************************************************)
(* Abstract position *)
(*****************************************************************************)
<<abstract infos>>
(*****************************************************************************)
(* Max min, range *)
(*****************************************************************************)
<<max min range>>
(*****************************************************************************)
(* Print helpers *)
(*****************************************************************************)
(* could perhaps create a special file related to display of code ? *)
type match_format =
(* ex: tests/misc/foo4.php:3
* foo(
* 1,
* 2);
*)
| Normal
(* ex: tests/misc/foo4.php:3: foo( *)
| Emacs
(* ex: tests/misc/foo4.php:3: foo(1,2) *)
| OneLine
(* When we print in the OneLine format we want to normalize the matched
* expression or code and so only print the tokens in the AST (and not
* the extra whitespace, newlines or comments). It's not enough though
* to just List.map str_of_info because some PHP expressions such as
* '$x = print FOO' would then be transformed into $x=printFOO, hence
* this function
*)
let rec join_with_space_if_needed xs =
match xs with
| [] -> ""
| [x] -> x
| x::y::xs ->
if x =~ ".*[a-zA-Z0-9_]$" &&
y =~ "^[a-zA-Z0-9_]"
then x ^ " " ^ (join_with_space_if_needed (y::xs))
else x ^ (join_with_space_if_needed (y::xs))
let _ = assert
(join_with_space_if_needed ["$x";"=";"print";"FOO"] = "$x=print FOO")
let print_match ?(format = Normal) ii =
let (mini, maxi) = min_max_ii_by_pos ii in
let (file, line) =
Ast.file_of_info mini, Ast.line_of_info mini in
let prefix = spf "%s:%d" file line in
let arr = Common.cat_array file in
let lines = Common.enum (Ast.line_of_info mini) (Ast.line_of_info maxi) in
match format with
| Normal ->
pr prefix;
(* todo? some context too ? *)
lines +> List.map (fun i -> arr.(i)) +> List.iter (fun s -> pr (" " ^ s));
| Emacs ->
pr (prefix ^ ": " ^ arr.(List.hd lines))
| OneLine ->
pr (prefix ^ ": " ^ (ii +> List.map Ast.str_of_info
+> join_with_space_if_needed))
(* obsolete: now catch Parse_php.Parse_error *)
let print_warning_if_not_correctly_parsed ast file =
if ast +> List.exists (function
| Ast_php.NotParsedCorrectly _ -> true
| _ -> false)
then begin
Common.pr2 (spf "warning: parsing problem in %s" file);
Common.pr2_once ("Use -parse_php to diagnose");
(* old:
* Common.pr2_once ("Probably because of XHP; -xhp may be helpful");
*)
end
(*****************************************************************************)
(* Ast getters *)
(*****************************************************************************)
<<ast getters>>
let get_static_vars_any any =
any +> V.do_visit_with_ref (fun aref -> { V.default_visitor with
V.kstmt = (fun (k,vx) x ->
match x with
| StaticVars (tok, xs, tok2) ->
xs |> Ast.uncomma |> List.iter (fun (dname, affect_opt) ->
Common.push2 dname aref
);
| _ ->
k x
);
})
(* todo? do last_stmt_is_a_return isomorphism ? *)
let get_returns_any any =
V.do_visit_with_ref (fun aref -> { V.default_visitor with
V.kstmt = (fun (k,vx) x ->
match x with
| Return (tok1, Some e, tok2) ->
Common.push2 e aref
| _ -> k x
)}) any
let get_vars_any any =
V.do_visit_with_ref (fun aref -> { V.default_visitor with
V.klvalue = (fun (k,vx) x ->
match x with
| Var (dname, _scope) ->
Common.push2 dname aref
| _ -> k x
);
V.kexpr = (fun (k, vx) x ->
match x with
(* todo? sure ?? *)
| Lambda def ->
def.l_use +> Common.do_option (fun (_tok, xs) ->
xs +> Ast.unparen +> Ast.uncomma +> List.iter (function
| LexicalVar (is_ref, dname) ->
Common.push2 dname aref
)
);
k x
| _ -> k x
);
}) any
(*****************************************************************************)
(* Ast adapters *)
(*****************************************************************************)
(* todo? let lvalue_to_expr ?? *)
let top_statements_of_program ast =
ast |> List.map (function
| StmtList xs -> xs
| FinalDef _|NotParsedCorrectly _
| ClassDef _| FuncDef _
-> []
) |> List.flatten
let toplevel_to_entity x =
match x with
| StmtList v1 ->
StmtListE v1
| FuncDef v1 ->
FunctionE v1
| ClassDef v1 ->
ClassE v1
(* todo? *)
| NotParsedCorrectly xs ->
MiscE xs
| FinalDef v1 ->
MiscE [v1]
(* We often do some analysis on "unit" of code like a function,
* a method, or toplevel statements. One can not use the
* 'toplevel' type for that because it contains Class and Interface which
* are too coarse grained; the method granularity is better.
*
* For instance it makes sense to have a CFG for a function, a method,
* or toplevel statements but a CFG for a class does not make sense.
*)
let functions_methods_or_topstms_of_program prog =
let funcs = ref [] in
let methods = ref [] in
let toplevels = ref [] in
let visitor = V.mk_visitor { V.default_visitor with
V.kfunc_def = (fun (k, _) def ->
Common.push2 def funcs
);
V.kmethod_def = (fun (k, _) def ->
Common.push2 def methods
);
V.ktop = (fun (k, _) top ->
match top with
| StmtList xs ->
Common.push2 xs toplevels
| _ ->
k top
);
}
in
visitor (Program prog);
!funcs, !methods, !toplevels
(* do some isomorphisms for declaration vs assignement *)
let get_vars_assignements_any recursor =
(* We want to group later assignement by variables, and
* so we want to use function like Common.group_by_xxx
* which requires to have identical key. Each dname occurence
* below has a different location and so we can use dname as
* key, but the name of the variable can be used, hence the use
* of Ast.dname
*)
V.do_visit_with_ref (fun aref -> { V.default_visitor with
V.kstmt = (fun (k,vx) x ->
match x with
| StaticVars (tok, xs, tok2) ->
xs |> Ast.uncomma |> List.iter (fun (dname, affect_opt) ->
let s = Ast.dname dname in
affect_opt |> Common.do_option (fun (_tok, scalar) ->
Common.push2 (s, scalar) aref;
);
);
| _ ->
k x
);
V.kexpr = (fun (k,vx) x ->
match x with
| Assign (lval, _, e)
| AssignOp (lval, _, e) ->
(* the expression itself can contain assignements *)
k x;
(* for now we handle only simple direct assignement to simple
* variables *)
(match lval with
| Var (dname, _scope) ->
let s = Ast.dname dname in
Common.push2 (s, e) aref;
| _ ->
()
)
(* todo? AssignRef AssignNew ? *)
| _ ->
k x
);
}
) recursor |> Common.group_assoc_bykey_eff
@
<<extract infos>>=
let extract_info_visitor recursor =
let globals = ref [] in
let hooks = { V.default_visitor with
V.kinfo = (fun (k, _) i -> Common.push2 i globals)
} in
begin
let vout = V.mk_visitor hooks in
recursor vout;
List.rev !globals
end
@
<<extract infos>>=
let ii_of_any any =
extract_info_visitor (fun visitor -> visitor any)
@
<<abstract infos>>=
let abstract_position_visitor recursor =
let hooks = { V2.default_visitor with
V2.kinfo = (fun (k, _) i ->
{ i with Parse_info.token = Parse_info.Ab }
)
} in
begin
let vout = V2.mk_visitor hooks in
recursor vout;
end
@
<<abstract infos>>=
let abstract_position_info_any x =
abstract_position_visitor (fun visitor -> visitor.V2.vany x)
@
<<max min range>>=
let min_max_ii_by_pos xs =
match xs with
| [] -> failwith "empty list, max_min_ii_by_pos"
| [x] -> (x, x)
| x::xs ->
let pos_leq p1 p2 = (Ast_php.compare_pos p1 p2) =|= (-1) in
xs +> List.fold_left (fun (minii,maxii) e ->
let maxii' = if pos_leq maxii e then e else maxii in
let minii' = if pos_leq e minii then e else minii in
minii', maxii'
) (x,x)
@
<<max min range>>=
let info_to_fixpos ii =
match Ast_php.pinfo_of_info ii with
| Parse_info.OriginTok pi ->
(* Ast_cocci.Real *)
pi.Parse_info.charpos
| Parse_info.FakeTokStr _
| Parse_info.Ab
| Parse_info.ExpandedTok _
-> failwith "unexpected abstract or faketok"
let min_max_by_pos xs =
let (i1, i2) = min_max_ii_by_pos xs in
(info_to_fixpos i1, info_to_fixpos i2)
let (range_of_origin_ii: Ast_php.tok list -> (int * int) option) =
fun ii ->
let ii = List.filter Ast_php.is_origintok ii in
try
let (min, max) = min_max_ii_by_pos ii in
assert(Ast_php.is_origintok max);
assert(Ast_php.is_origintok min);
let strmax = Ast_php.str_of_info max in
Some
(Ast_php.pos_of_info min, Ast_php.pos_of_info max + String.length strmax)
with _ ->
None
@
<<ast getters>>=
let get_funcalls_any any =
let h = Hashtbl.create 101 in
let hooks = { V.default_visitor with
(* TODO if nested function ??? still wants to report ? *)
V.klvalue = (fun (k,vx) x ->
match x with
| FunCallSimple (callname, args) ->
let str = Ast_php.name callname in
Hashtbl.replace h str true;
k x
| _ -> k x
);
}
in
let visitor = V.mk_visitor hooks in
visitor any;
Common.hashset_to_list h
@
<<ast getters>>=
@
<<ast getters>>=
let get_constant_strings_any any =
let h = Hashtbl.create 101 in
let hooks = { V.default_visitor with
V.kconstant = (fun (k,vx) x ->
match x with
| String (str,ii) ->
Hashtbl.replace h str true;
| _ -> k x
);
V.kencaps = (fun (k,vx) x ->
match x with
| EncapsString (str, ii) ->
Hashtbl.replace h str true;
| _ -> k x
);
}
in
(V.mk_visitor hooks) any;
Common.hashset_to_list h
@
<<ast getters>>=
let get_funcvars_any any =
let h = Hashtbl.create 101 in
let hooks = { V.default_visitor with
V.klvalue = (fun (k,vx) x ->
match x with
| FunCallVar (qu_opt, var, args) ->
(* TODO enough ? what about qopt ?
* and what if not directly a Var ?
*
* and what about call_user_func ? should be
* transformed at parsing time into a FunCallVar ?
*)
(match var with
| Var (dname, _scope) ->
let str = Ast_php.dname dname in
Hashtbl.replace h str true;
k x
| _ -> k x
)
| _ -> k x
);
}
in
let visitor = V.mk_visitor hooks in
visitor any;
Common.hashset_to_list h
@