This repository was archived by the owner on Jun 4, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 204
/
Copy pathast_php.ml
1003 lines (906 loc) · 36 KB
/
ast_php.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(* Yoann Padioleau
*
* Copyright (C) 2009-2013 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)
open Common
(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(*
* This module defines an Abstract Syntax Tree for PHP 5.2 with
* a few extensions from PHP 5.3 (e.g. closures, namespace, const) and
* PHP 5.4 (e.g. traits) as well as support for many Facebook
* extensions (XHP, generators, annotations, generics, collections,
* type definitions, implicit fields via constructor parameters).
*
* This is actually more a concrete syntax tree (CST) than an AST. This
* is convenient in a refactoring context or code visualization
* context, but if you need to do some heavy static analysis, consider
* instead lang_php/analyze/foundation/pil.ml which defines a
* PHP Intermediate Language a la CIL.
*
* todo:
* - unify toplevel statement vs statements? hmmm maybe not
* - maybe even in a refactoring context a PIL+comment
* (see pretty/ast_pp.ml) would make more sense.
*
* NOTE: data from this type are often marshalled in berkeley DB tables
* which means that if you add a new constructor or field in the types below,
* you must erase the berkeley DB databases otherwise pfff
* will probably ends with a segfault (OCaml serialization is not
* type-safe). A hacky solution is to add new constructors only at the end
* of a type definition.
*
* COUPLING: some programs in other languages (e.g. Python) may
* use some of the pfff binding, or json/sexp exporters, so if you
* change the name of constructors in this file, don't forget
* to regenerate the json/sexp exporters, but also to modify the
* dependent programs !!!! An easier solution is to not change this
* file, or to only add new constructors.
*)
(*****************************************************************************)
(* The AST related types *)
(*****************************************************************************)
(* ------------------------------------------------------------------------- *)
(* Token/info *)
(* ------------------------------------------------------------------------- *)
(* Contains among other things the position of the token through
* the Parse_info.parse_info embedded inside it, as well as the
* transformation field that makes possible spatch.
*)
type tok = Parse_info.info
and info = tok
(* shortcuts to annotate some information with token/position information *)
and 'a wrap = 'a * tok
and 'a paren = tok * 'a * tok
and 'a brace = tok * 'a * tok
and 'a bracket = tok * 'a * tok
and 'a angle = tok * 'a * tok
and 'a single_angle = tok * 'a * tok
and 'a comma_list = ('a, tok (* the comma *)) Common.either list
and 'a comma_list_dots =
('a, tok (* ... in parameters *), tok (* the comma *)) Common.either3 list
(* with tarzan *)
(* ------------------------------------------------------------------------- *)
(* Ident/Name/LongName *)
(* ------------------------------------------------------------------------- *)
(* See also analyze_php/namespace_php.ml *)
(* Why not factorize Name and XhpName together? Because I was not
* sure originally some analysis should also be applied on Xhp
* classes. Moreover there is two syntax for xhp: :x:base for 'defs'
* and <x:base for 'uses', so having this xhp_tag allow us to easily do
* comparison between xhp identifiers.
*)
type ident =
(* was called T_STRING in Zend, which are really just LABEL, see lexer.mll*)
| Name of string wrap
(* xhp: for :x:foo the list is ["x";"foo"] *)
| XhpName of xhp_tag wrap
(* for :x:foo the list is ["x";"foo"] *)
and xhp_tag = string list
(* The string does not contain the '$'. The info itself will usually
* contain it, but not always! Indeed if the variable we build comes
* from an encapsulated strings as in echo "${x[foo]}" then the 'x'
* will be parsed as a T_STRING_VARNAME, and eventually lead to a DName,
* even if in the text it appears as a name.
* So this token is kind of a FakeTok sometimes.
*
* If at some point you want to do some program transformation,
* you may have to normalize this 'string wrap' before moving it
* to another context !!!
*)
type dname =
(* D for dollar. Was called T_VARIABLE in the original PHP parser/lexer *)
| DName of string wrap
(* The antislash is a separator but it can also be in the leading position.
* The keyword 'namespace' can also be in a leading position.
*)
type qualified_ident = qualified_ident_element list
and qualified_ident_element =
| QI of ident (* the ident can be 'namespace' *)
| QITok of tok (* '\' *)
(* with tarzan *)
type name =
| XName of qualified_ident
(* Could also transform at parsing time all occurences of self:: and
* parent:: by their respective names. But I prefer to have all the
* PHP features somehow explicitely represented in the AST.
*)
| Self of tok
| Parent of tok
(* php 5.3 late static binding (no idea why it's useful ...) *)
| LateStatic of tok
(* with tarzan *)
(* ------------------------------------------------------------------------- *)
(* Types *)
(* ------------------------------------------------------------------------- *)
type hint_type =
| Hint of name (* only self/parent, no static *) *
type_args option
| HintArray of tok
| HintQuestion of (tok * hint_type)
| HintTuple of hint_type comma_list paren
| HintCallback of
(tok (* "function" *)
* (hint_type comma_list_dots paren) (* params *)
* (tok * tok option * hint_type) option (* return type *)
) paren
| HintShape of
tok (* "shape" *) *
(string_const_expr * tok (* '=>' *) * hint_type) comma_list paren
| HintTypeConst of
hint_type (* lhs *)
* tok (* '::' *)
* hint_type (* rhs *)
| HintVariadic of (tok * hint_type option)
and type_args = hint_type comma_list single_angle
and type_params = type_param comma_list single_angle
and type_param =
| TParam of ident
| TParamConstraint of ident * tok (* as *) * class_name
and class_name = hint_type
(* This is used in Cast. For type analysis see type_php.ml *)
and ptype =
| BoolTy
| IntTy
| DoubleTy (* float *)
| StringTy
| ArrayTy
| ObjectTy
(* ------------------------------------------------------------------------- *)
(* Expression *)
(* ------------------------------------------------------------------------- *)
(* I used to have a 'type expr = exprbis * exp_type_info' but it complicates
* many patterns when working on expressions, and it turns out I never
* implemented the type annotater. It's easier to do such an annotater on
* a real AST like the PIL. So just have this file be a simple concrete
* syntax tree and no more.
*)
and expr =
(* constant/function/class/method/field/class_cst name.
*
* Now that we've unified lvalue and expr, the use of Id is more
* ambiguous; it can refer to a classname, a function name,
* a constant, etc. You need to match the context of use of Id
* to know in which situation you are (and take care if you use a visitor
* to not always call recursively the visitor/continuation):
*
* - function: Call (Id, _)
* - method: Call (ObjGet (_, Id), _), Call (ClassGet (_, Id), _)
* - class: ClassGet (Id, _), New (Id, _), AssignNew(Id,_, InstanceOf(_, Id)
* and also extends, implements, catch, type
* - class_constant: ClassGet (_, Id) (including the special C::class)
* - field: ObjGet(_, Id)
* - constant: Id
*
* todo: just like we annotate IdVar with scope info, we could annotate
* Id with a kind info.
*)
| Id of name
(* less: maybe could unify.
* note that IdVar is used not only for local variables
* but also for globals, class variables, parameters, etc.
*)
| IdVar of dname * Scope_php.phpscope ref
| This of tok
| Call of expr * argument comma_list paren
| ObjGet of expr * tok (* -> *) * expr
| ClassGet of class_name_reference * tok (* :: *) * expr
| ArrayGet of expr * expr option bracket
| HashGet of expr * expr brace
| BraceIdent of expr brace
| Deref of tok (* $ *) * expr
(* start of expr_without_variable in original PHP lexer/parser terminology *)
| Sc of scalar
| Binary of expr * binaryOp wrap * expr
| Unary of unaryOp wrap * expr
(* should be a statement ... *)
| Assign of lvalue * tok (* = *) * expr
| AssignOp of lvalue * assignOp wrap * expr
| Postfix of rw_variable * fixOp wrap
(* todo: should actually be called Prefix :) *)
| Infix of fixOp wrap * rw_variable
(* PHP 5.3 allows 'expr ?: expr' hence the 'option' type below
* from www.php.net/manual/en/language.operators.comparison.php#language.operators.comparison.ternary:
* "Since PHP 5.3, it is possible to leave out the middle part of the
* ternary operator. Expression
* expr1 ?: expr3 returns expr1 if expr1 evaluates to TRUE, and expr3
* otherwise."
*)
| CondExpr of expr * tok (* ? *) * expr option * tok (* : *) * expr
| AssignList of tok (* list *) * list_assign comma_list paren *
tok (* = *) * expr
| ArrayLong of tok (* array | shape *) * array_pair comma_list paren
(* php 5.4: https://wiki.php.net/rfc/shortsyntaxforarrays *)
| ArrayShort of array_pair comma_list bracket
(* facebook-ext: *)
| Collection of name * array_pair comma_list brace
| New of tok * class_name_reference * argument comma_list paren option
| Clone of tok * expr
| AssignRef of lvalue * tok (* = *) * tok (* & *) * lvalue
| AssignNew of lvalue * tok (* = *) * tok (* & *) * tok (* new *) *
class_name_reference *
argument comma_list paren option
| Cast of castOp wrap * expr
| CastUnset of tok * expr (* ??? *)
| InstanceOf of expr * tok * class_name_reference
(* !The evil eval! *)
| Eval of tok * expr paren
(* Woohoo, viva PHP 5.3 *)
| Lambda of lambda_def
| ShortLambda of short_lambda_def
(* should be a statement ... *)
| Exit of tok * (expr option paren) option
| At of tok (* @ *) * expr
| Print of tok * expr
| BackQuote of tok * encaps list * tok
(* should be at toplevel *)
| Include of tok * expr | IncludeOnce of tok * expr
| Require of tok * expr | RequireOnce of tok * expr
| Empty of tok * lvalue paren
| Isset of tok * lvalue comma_list paren
(* xhp: *)
| XhpHtml of xhp_html
(* php-facebook-ext:
*
* todo: this should be at the statement level as there are only a few
* forms of yield that HPHP supports (e.g. yield <expr>; and
* <lval> = yield <expr>). One could then have a YieldReturn and YieldAssign
* but this may change and none of the analysis in pfff need to
* understand yield so for now just make it simple and add yield
* at the expression level.
*)
| Yield of tok * array_pair (* should have no ref inside *)
| YieldBreak of tok * tok
(* php-facebook-ext: Just like yield, this should be at the statement level *)
| Await of tok * expr
(* only appear when process sgrep patterns *)
| SgrepExprDots of tok
(* unparser: *)
| ParenExpr of expr paren
and scalar =
| C of constant
| Guil of tok (* '"' or b'"' *) * encaps list * tok (* '"' *)
| HereDoc of
tok (* < < < EOF, or b < < < EOF *) *
encaps list *
tok (* EOF; *)
and constant =
| Int of string wrap (* decimal, hex, or binary int format *)
| Double of string wrap
(* see also Guil for interpolated strings
* The string does not contain the enclosing '"' or "'".
* It does not contain either the possible 'b' prefix
*)
| String of string wrap
| PreProcess of cpp_directive wrap
(* only appear when process xdebug coverage file *)
| XdebugClass of name * class_stmt list
| XdebugResource
(* http://php.net/manual/en/language.constants.predefined.php *)
and cpp_directive =
| Line | File | Dir
| ClassC | TraitC
| MethodC | FunctionC
| NamespaceC
and encaps =
| EncapsString of string wrap
| EncapsVar of lvalue
(* for "xx {$beer}s" *)
| EncapsCurly of tok * lvalue * tok
(* for "xx ${beer}s" *)
| EncapsDollarCurly of tok (* '${' *) * lvalue * tok
| EncapsExpr of tok * expr * tok
and fixOp = Dec | Inc
and binaryOp = Arith of arithOp | Logical of logicalOp | BinaryConcat (* . *) | Pipe | CombinedComparison
and arithOp =
| Plus | Minus | Mul | Div | Mod
| DecLeft | DecRight
| And | Or | Xor
and logicalOp =
| Inf | Sup | InfEq | SupEq
| Eq | NotEq
| Identical (* === *) | NotIdentical (* !== *)
| AndLog | OrLog | XorLog
| AndBool | OrBool (* diff with AndLog ? short-circuit operators ? *)
and assignOp = AssignOpArith of arithOp
| AssignConcat (* .= *)
and unaryOp =
| UnPlus | UnMinus
| UnBang | UnTilde
and castOp = ptype
(* less: merge with foreach_pattern, list($k => $v) = ... is allowed no? *)
and list_assign =
| ListVar of lvalue
| ListList of tok * list_assign comma_list paren
| ListEmpty
and array_pair =
| ArrayExpr of expr
| ArrayRef of tok (* & *) * lvalue
| ArrayArrowExpr of expr * tok (* => *) * expr
| ArrayArrowRef of expr * tok (* => *) * tok (* & *) * lvalue
and xhp_html =
| Xhp of xhp_tag wrap * xhp_attribute list * tok (* > *) *
xhp_body list * xhp_tag option wrap
| XhpSingleton of xhp_tag wrap * xhp_attribute list * tok (* /> *)
and xhp_attribute = xhp_attr_name * tok (* = *) * xhp_attr_value
and xhp_attr_name = string wrap (* e.g. task-bar *)
and xhp_attr_value =
| XhpAttrString of tok (* '"' *) * encaps list * tok (* '"' *)
| XhpAttrExpr of expr brace
(* sgrep: *)
| SgrepXhpAttrValueMvar of string wrap
and xhp_body =
| XhpText of string wrap
| XhpExpr of expr brace
| XhpNested of xhp_html
and argument =
| Arg of expr
| ArgRef of tok * w_variable
| ArgUnpack of tok * expr
(* now unified with expr *)
and lvalue = expr
and class_name_reference = expr
(* semantic: those grammar rule names were used in the original PHP
* lexer/parser but not enforced. It's just comments. *)
and rw_variable = lvalue
and r_variable = lvalue
and w_variable = lvalue
(* static_scalar used to be a special type allowing constants and
* a restricted form of expressions. But it was yet
* another type and it turned out it was making things like spatch
* and visitors more complicated because stuff like "+ 1" could
* be an expr or a static_scalar. We don't need this "isomorphism".
* I never leveraged the specificities of static_scalar (maybe a compiler
* would, but my checker/refactorers/... didn't).
*
* Note that it's not 'type static_scalar = scalar' because static_scalar
* actually allows arrays (why the heck they called it a scalar then ...)
* and plus/minus which are only in expr.
*)
and static_scalar = expr
(* string_const_expr is for shape field names which are permitted to be either
* literal strings or class constants. *)
and string_const_expr = expr
(* ------------------------------------------------------------------------- *)
(* Statement *)
(* ------------------------------------------------------------------------- *)
(* By introducing Lambda, expr and stmt are now mutually recursive *)
and stmt =
| ExprStmt of expr * tok (* ; *)
| EmptyStmt of tok (* ; *)
| Block of stmt_and_def list brace
| If of tok * expr paren * stmt *
(* elseif *) if_elseif list *
(* else *) if_else option
| IfColon of tok * expr paren *
tok * stmt_and_def list * new_elseif list * new_else option *
tok * tok
(* if(cond):
* stmts; defs;
* elseif(cond):
* stmts;..
* else(cond):
* defs; stmst;
* endif; *)
| While of tok * expr paren * colon_stmt
| Do of tok * stmt * tok * expr paren * tok
| For of tok * tok *
for_expr * tok *
for_expr * tok *
for_expr *
tok *
colon_stmt
| Switch of tok * expr paren * switch_case_list
(* if it's a expr_without_variable, the second arg must be a Right variable,
* otherwise if it's a variable then it must be a foreach_variable
*)
| Foreach of tok * tok (*'('*) * expr * tok option (* await *) *
tok (* as *) * foreach_pattern *
tok (*')'*) * colon_stmt
(* example: foreach(expr as $lvalue) { ... }
* foreach(expr as $foreach_varialbe => $lvalue) { ... }
* foreach(expr as list($x, $y)) { ... }
*)
| Break of tok * expr option * tok
| Continue of tok * expr option * tok
| Return of tok * expr option * tok
| Throw of tok * expr * tok
| Try of tok * stmt_and_def list brace * catch list * finally list
| Echo of tok * expr comma_list * tok
| Globals of tok * global_var comma_list * tok
| StaticVars of tok * static_var comma_list * tok
| InlineHtml of string wrap
| Use of tok * use_filename * tok
| Unset of tok * lvalue comma_list paren * tok
| Declare of tok * declare comma_list paren * colon_stmt
(* nested funcs and classes are mostly used inside if() where the
* if() actually behaves like an ifdef in C.
*)
(* was in stmt_and_def before *)
| FuncDefNested of func_def
(* traits are actually not allowed here *)
| ClassDefNested of class_def
and switch_case_list =
| CaseList of
tok (* { *) * tok option (* ; *) * case list * tok (* } *)
| CaseColonList of
tok (* : *) * tok option (* ; *) * case list *
tok (* endswitch *) * tok (* ; *)
and case =
| Case of tok * expr * tok * stmt_and_def list
| Default of tok * tok * stmt_and_def list
and if_elseif = tok * expr paren * stmt
and if_else = (tok * stmt)
and for_expr = expr comma_list (* can be empty *)
and foreach_pattern =
| ForeachVar of foreach_variable
| ForeachArrow of foreach_pattern * tok * foreach_pattern
| ForeachList of tok (* list *) * list_assign comma_list paren
and foreach_variable = is_ref * lvalue
and catch =
tok * (class_name * dname) paren * stmt_and_def list brace
and finally =
tok * stmt_and_def list brace
and use_filename =
| UseDirect of string wrap
| UseParen of string wrap paren
and declare = ident * static_scalar_affect
and colon_stmt =
| SingleStmt of stmt
| ColonStmt of tok (* : *) * stmt_and_def list * tok (* endxxx *) * tok (* ; *)
and new_elseif = tok * expr paren * tok * stmt_and_def list
and new_else = tok * tok * stmt_and_def list
(* stmt_and_def used to be a special type allowing Stmt or nested functions
* or classes but it was introducing yet another, not so useful, intermediate
* type.
*)
and stmt_and_def = stmt
(* ------------------------------------------------------------------------- *)
(* Function (and method) definition *)
(* ------------------------------------------------------------------------- *)
and func_def = {
f_attrs: attributes option;
f_tok: tok; (* function *)
f_type: function_type;
(* "async" always valid ; others only valid for methods *)
f_modifiers: modifier wrap list;
f_ref: is_ref;
(* can be a Name("__lambda", f_tok) when used for lambdas *)
f_name: ident;
f_tparams: type_params option;
(* the dots should be only at the end (unless in sgrep mode) *)
f_params: parameter comma_list_dots paren;
(* static-php-ext: *)
f_return_type: (tok (* : *) * tok option (* @ *) * hint_type) option;
(* the opening/closing brace can be (fakeInfo(), ';') for abstract methods *)
f_body: stmt_and_def list brace;
}
and function_type =
| FunctionRegular
| FunctionLambda
| MethodRegular
| MethodAbstract
and parameter = {
p_attrs: attributes option;
(* php-facebook-ext: implicit field via constructor parameter,
* this is always None except for constructors and the modifier
* can be only Public or Protected or Private (but never Static, etc).
*)
p_modifier: modifier wrap option;
(* php-facebook-ext: to not generate runtime errors if wrong type hint *)
p_soft_type: tok (* @ *) option;
p_type: hint_type option;
p_ref: is_ref;
p_name: dname;
p_default: static_scalar_affect option;
p_variadic: tok (* ... *) option;
}
and is_ref = tok (* bool wrap ? *) option
(* the f_name in func_def should be a fake name *)
and lambda_def = (lexical_vars option * func_def)
and lexical_vars = tok (* use *) * lexical_var comma_list paren
and lexical_var = LexicalVar of is_ref * dname
(* todo? could factorize with func_def, but this will require many
* elements to be fake token, e.g. the parenthesis for parameters
* when have only one parameter, the brace and semicolon when the body
* is a simple expression
*)
and short_lambda_def = {
(* "async" is the only valid modifier *)
sl_modifiers: modifier wrap list;
sl_params: short_lambda_params;
sl_tok: tok (* ==> *) option; (* async { } doesn't use a ==> *)
sl_body: short_lambda_body;
}
and short_lambda_params =
| SLSingleParam of parameter
| SLParams of parameter comma_list_dots paren
| SLParamsOmitted (* for async { } lambdas *)
and short_lambda_body =
| SLExpr of expr
| SLBody of stmt_and_def list brace
(* ------------------------------------------------------------------------- *)
(* Constant definition *)
(* ------------------------------------------------------------------------- *)
and constant_def = {
cst_toks: tok (* const *) * tok (* = *) * tok (* ; *);
cst_name: ident;
cst_type: hint_type option;
cst_val: static_scalar;
}
(* ------------------------------------------------------------------------- *)
(* Class (and interface/trait) definition *)
(* ------------------------------------------------------------------------- *)
(* I used to have a class_def and interface_def because interface_def
* didn't allow certain forms of statements (methods with a body), but
* with the introduction of traits, it does not make that much sense
* to be so specific, so I factorized things. Classes/interfaces/traits
* are not that different; interfaces are really just abstract traits.
* We also now include enums, which share a bunch of machinery with classes.
*)
and class_def = {
c_attrs: attributes option;
c_type: class_type;
c_name: ident;
c_tparams: type_params option;
(* PHP uses single inheritance. Interfaces can also use 'extends'
* but we use the c_implements field for that (because it can be a list).
*)
c_extends: extend option;
(* For classes it's a list of interfaces, for interfaces a list of other
* interfaces it extends. Traits can also now implement interfaces.
*)
c_implements: interface option;
(* If this class is an enum, what is the underlying type (and
* constraint) of the enum? *)
c_enum_type: enum_type option;
(* The class_stmt for interfaces are restricted to only abstract methods.
* The class_stmt seems to be unrestricted for traits (it can even
* contain some 'use') *)
c_body: class_stmt list brace;
}
and class_type =
| ClassRegular of tok (* class *)
| ClassAbstractFinal of tok * tok * tok (* abstract final class *)
| ClassFinal of tok * tok (* final class *)
| ClassAbstract of tok * tok (* abstract class *)
| Interface of tok (* interface *)
(* PHP 5.4 traits: http://php.net/manual/en/language.oop5.traits.php
* Allow to mixin behaviors and data so it's really just
* multiple inheritance with a cooler name.
* note: traits are allowed only at toplevel.
*)
| Trait of tok (* trait *)
| Enum of tok (* enum *)
and extend = tok * class_name
and interface = tok * class_name comma_list
and class_stmt =
(* This is abused to represent class constants in enums, so sometimes
* tok is actually fakeInfo. *)
| ClassConstants of
tok option (* abstract *) *
tok (* const *) *
hint_type option *
class_constant comma_list * tok (*;*)
| ClassVariables of
class_var_modifier *
(* static-php-ext: *)
hint_type option *
class_variable comma_list * tok (* ; *)
| Method of method_def
| XhpDecl of xhp_decl
(* php 5.4, 'use' can appear in classes/traits (but not interface) *)
| UseTrait of tok (*use*) * class_name comma_list *
(tok (* ; *), trait_rule list brace) Common.either
(* facebook-ext: 'require' can appear only in traits *)
| TraitConstraint of
tok (* require *) * trait_constraint_kind wrap * hint_type * tok (* ; *)
| ClassType of type_def
and class_constant = ident * static_scalar_affect option
and class_variable = dname * static_scalar_affect option
and class_var_modifier =
| NoModifiers of tok (* 'var' *)
| VModifiers of modifier wrap list
(* a few special names: __construct, __call, __callStatic
* ugly: f_body is an empty stmt_and_def for abstract method
* and the ';' is put for the info of the closing brace
* (and the opening brace is a fakeInfo).
*)
and method_def = func_def
and modifier =
| Public | Private | Protected
| Static | Abstract | Final | Async
and xhp_decl =
| XhpAttributesDecl of
tok (* attribute *) * xhp_attribute_decl comma_list * tok (*;*)
(* there is normally only one 'children' declaration in a class *)
| XhpChildrenDecl of
tok (* children *) * xhp_children_decl * tok (*;*)
| XhpCategoriesDecl of
tok (* category *) * xhp_category_decl comma_list * tok (*;*)
and xhp_attribute_decl =
| XhpAttrInherit of xhp_tag wrap
| XhpAttrDecl of xhp_attribute_type * xhp_attr_name *
xhp_value_affect option * tok option (* is required *)
and xhp_attribute_type =
| XhpAttrType of hint_type
| XhpAttrVar of tok
| XhpAttrEnum of tok (* enum *) * constant comma_list brace
and xhp_value_affect = tok (* = *) * static_scalar
(* Regexp-like syntax. The grammar actually restricts what kinds of
* regexps can be written. For instance pcdata must be nested. But
* here I simplified the type.
*)
and xhp_children_decl =
| XhpChild of xhp_tag wrap (* :x:frag *)
| XhpChildCategory of xhp_tag wrap (* %x:frag *)
| XhpChildAny of tok
| XhpChildEmpty of tok
| XhpChildPcdata of tok
| XhpChildSequence of xhp_children_decl * tok (*,*) * xhp_children_decl
| XhpChildAlternative of xhp_children_decl * tok (*|*) * xhp_children_decl
| XhpChildMul of xhp_children_decl * tok (* * *)
| XhpChildOption of xhp_children_decl * tok (* ? *)
| XhpChildPlus of xhp_children_decl * tok (* + *)
| XhpChildParen of xhp_children_decl paren
and xhp_category_decl = xhp_tag wrap (* %x:frag *)
(* those are bad features ... noone should use them. *)
and trait_rule =
| InsteadOf of name * tok * ident * tok (* insteadof *) *
class_name comma_list * tok (* ; *)
| As of (ident, name * tok * ident) Common.either * tok (* as *) *
modifier wrap list * ident option * tok (* ; *)
and trait_constraint_kind =
| MustExtend
| MustImplement
and enum_type = {
e_tok: tok; (* : *)
e_base: hint_type;
e_constraint: (tok (* as *) * hint_type) option;
}
(* ------------------------------------------------------------------------- *)
(* Type definition *)
(* ------------------------------------------------------------------------- *)
(* facebook-ext: *)
and type_def = {
t_tok: tok; (* type/newtype *)
t_name: ident;
t_tparams: type_params option;
t_tconstraint: (tok (* as *) * hint_type) option;
t_tokeq: tok; (* = *)
t_kind: type_def_kind;
t_sc: tok; (* ; *)
}
and type_def_kind =
| Alias of hint_type
| Newtype of hint_type
| ClassConstType of hint_type option
(* ------------------------------------------------------------------------- *)
(* Other declarations *)
(* ------------------------------------------------------------------------- *)
and global_var =
| GlobalVar of dname
| GlobalDollar of tok * r_variable
| GlobalDollarExpr of tok * expr brace
and static_var = dname * static_scalar_affect option
and static_scalar_affect = tok (* = *) * static_scalar
(* the qualified_ident can have a leading '\' *)
and namespace_use_rule =
| ImportNamespace of qualified_ident
| AliasNamespace of qualified_ident * tok (* as *) * ident
(* ------------------------------------------------------------------------- *)
(* User attributes, a.k.a annotations *)
(* ------------------------------------------------------------------------- *)
(* HPHP extension similar to http://en.wikipedia.org/wiki/Java_annotation *)
and attribute =
| Attribute of string wrap
| AttributeWithArgs of string wrap * static_scalar comma_list paren
and attributes = attribute comma_list angle
(* ------------------------------------------------------------------------- *)
(* The toplevels elements *)
(* ------------------------------------------------------------------------- *)
(* For parsing reasons and estet I think it's better to differentiate
* nested functions and toplevel functions.
* update: sure? ast_php_simple simplify things.
* Also it's better to group the toplevel statements together (StmtList below),
* so that in the database later they share the same id.
*
* Note that nested functions are usually under a if(defined(...)) at
* the toplevel. There is no ifdef in PHP so they reuse 'if'.
*)
and toplevel =
| StmtList of stmt list
| FuncDef of func_def
| ClassDef of class_def
(* PHP 5.3, see http://us.php.net/const *)
| ConstantDef of constant_def
(* facebook extension *)
| TypeDef of type_def
(* PHP 5.3, see http://www.php.net/manual/en/language.namespaces.rules.php*)
(* the qualified_ident below can not have a leading '\' *)
| NamespaceDef of tok * qualified_ident * tok (* ; *)
(* when there is no qualified_ident, this means global scope *)
| NamespaceBracketDef of tok * qualified_ident option * toplevel list brace
| NamespaceUse of tok * namespace_use_rule comma_list * tok (* ; *)
(* old: | Halt of tok * unit paren * tok (* __halt__ ; *) *)
| NotParsedCorrectly of tok list (* when Flag.error_recovery = true *)
| FinalDef of tok (* EOF *)
and program = toplevel list
(* with tarzan *)
(* ------------------------------------------------------------------------- *)
(* Entity and any *)
(* ------------------------------------------------------------------------- *)
(* The goal of the entity type is to lift up important entities which
* are originally nested in the AST such as methods.
*
* history: was in ast_entity_php.ml before but better to put everything
* in one file.
*)
type entity =
| FunctionE of func_def
| ClassE of class_def
| ConstantE of constant_def
| TypedefE of type_def
| StmtListE of stmt list
| MethodE of method_def
| ClassConstantE of class_constant
| ClassVariableE of class_variable * modifier list
| XhpAttrE of xhp_attribute_decl
| MiscE of tok list
type any =
| Expr of expr
| Stmt2 of stmt
| StmtAndDefs of stmt_and_def list
| Toplevel of toplevel
| Program of program
| Entity of entity
| Argument of argument
| Arguments of argument comma_list
| Parameter of parameter
| Parameters of parameter comma_list_dots paren
| Body of stmt_and_def list brace
| ClassStmt of class_stmt
| ClassConstant2 of class_constant
| ClassVariable of class_variable
| ListAssign of list_assign
| ColonStmt2 of colon_stmt
| Case2 of case
| XhpAttribute of xhp_attribute
| XhpAttrValue of xhp_attr_value
| XhpHtml2 of xhp_html
| XhpChildrenDecl2 of xhp_children_decl
| Info of tok
| InfoList of tok list
| Ident2 of ident
| Hint2 of hint_type
(* with tarzan *)
(*****************************************************************************)
(* Some constructors *)
(*****************************************************************************)
let noScope () = ref (Scope_code.NoScope)
let fakeInfo ?(next_to=None) str = { Parse_info.
token = Parse_info.FakeTokStr (str, next_to);
transfo = Parse_info.NoTransfo;
}
(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
let unwrap = fst
let unparen (_a,b,_c) = b
let unbrace = unparen
let unbracket = unparen
let uncomma xs = Common.map_filter (function
| Left e -> Some e
| Right _info -> None
) xs
let uncomma_dots xs = Common.map_filter (function
| Left3 e -> Some e
| Right3 _info | Middle3 _info -> None
) xs
let unarg arg =
match arg with
| Arg e -> e
| ArgRef _ -> failwith "Found a ArgRef"
| ArgUnpack _ -> failwith "Found a ArgUnpack"
let unargs xs =
uncomma xs +> Common.partition_either (function
| Arg e -> Left e
| ArgRef (_tok, e) -> Right (e)
| ArgUnpack (_tok, e) -> Right (e)
)
let unmodifiers class_vars =
match class_vars with
| NoModifiers _ -> []
| VModifiers xs -> List.map unwrap xs
let map_paren f (lp, x, rp) = (lp, f x, rp)
let map_comma_list f xs = List.map (fun x ->
match x with
| Left e -> Left (f e)
| Right tok -> Right tok
)
xs
(*****************************************************************************)
(* Abstract line *)
(*****************************************************************************)
(* When we have extended the AST to add some info about the tokens,
* such as its line number in the file, we can not use anymore the
* ocaml '=' to compare AST elements. To overcome this problem, to be
* able to use again '=', we just have to get rid of all those extra
* information, to "abstract those line" (al) information.
*)
let al_info x =
{ x with Parse_info.token = Parse_info.Ab }
(*****************************************************************************)
(* Views *)
(*****************************************************************************)
(* examples:
* inline more static funcall in expr type or variable type
*)
(*****************************************************************************)
(* Helpers (could also be put in lib_parsing.ml) *)
(*****************************************************************************)
let str_of_ident e =
match e with
| Name x -> unwrap x
| XhpName (xs, _tok) -> ":" ^ (Common.join ":" xs)
let info_of_ident e =
match e with
| (Name (_x,y)) -> y
| (XhpName (_x,y)) -> y
let str_of_dname (DName x) = unwrap x
let info_of_dname (DName (_x,y)) = y
let info_of_qualified_ident = function
| [] -> raise Impossible
| (QI x)::_xs -> info_of_ident x
| (QITok tok)::_xs -> tok
let info_of_name x =
match x with
| Self tok | Parent tok | LateStatic tok -> tok
| XName xs ->
(match xs with
| [] -> raise Impossible
| x::_ ->
(match x with
| QITok tok -> tok
| QI id -> info_of_ident id
)
)
exception TodoNamespace of tok
(* todo? copy the one in cmf/uses_module_helpers.ml now? *)
let str_of_name x =
match x with
| XName [QI x] -> str_of_ident x
| Self tok | Parent tok | LateStatic tok -> Parse_info.str_of_info tok
| XName qu -> raise (TodoNamespace (info_of_qualified_ident qu))
let str_of_name_namespace x =
match x with
| Self tok | Parent tok | LateStatic tok -> Parse_info.str_of_info tok
| XName xs ->
xs +> List.map (function
| QITok _ -> "\\"
| QI id -> str_of_ident id
) +> Common.join ""
let name_of_class_name x =
match x with
| Hint (name, _targs) -> name
| _ -> raise Impossible
let str_of_class_name x =
let name = name_of_class_name x in
str_of_name name
let ident_of_class_name x =
let name = name_of_class_name x in
match name with