-
Notifications
You must be signed in to change notification settings - Fork 3k
/
full_fidelity_ast.ml
3333 lines (3201 loc) · 127 KB
/
full_fidelity_ast.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(**
* Copyright (c) 2016, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the "hack" directory of this source tree.
*
*)
module SyntaxError = Full_fidelity_syntax_error
open Prim_defs
open Hh_core
(* What we are lowering to *)
open Ast
(* Don't allow expressions to nest deeper than this to avoid stack overflow *)
let recursion_limit = 30000
(* Context of the file being parsed, as (hopefully some day read-only) state. *)
type env =
{ is_hh_file : bool
; codegen : bool
; systemlib_compat_mode : bool
; php5_compat_mode : bool
; elaborate_namespaces : bool
; include_line_comments : bool
; keep_errors : bool
; quick_mode : bool
; lower_coroutines : bool
; enable_hh_syntax : bool
; fail_open : bool
; parser_options : ParserOptions.t
; fi_mode : FileInfo.mode
; file : Relative_path.t
; stats : Stats_container.t option
; hacksperimental : bool
; top_level_statements : bool (* Whether we are (still) considering TLSs*)
(* Changing parts; should disappear in future. `mutable` saves allocations. *)
; mutable ignore_pos : bool
; mutable max_depth : int (* Filthy hack around OCaml bug *)
; mutable saw_yield : bool (* Information flowing back up *)
; mutable unsafes : ISet.t (* Offsets of UNSAFE_EXPR in trivia *)
; mutable saw_std_constant_redefinition: bool
(* Whether we've seen COMPILER_HALT_OFFSET. The value of COMPILER_HALT_OFFSET
defaults to 0 if HALT_COMPILER isn't called.
None -> COMPILER_HALT_OFFSET isn't in the source file
Some 0 -> COMPILER_HALT_OFFSET is in the source file, but HALT_COMPILER isn't
Some x -> COMPILER_HALT_OFFSET is in the source file,
HALT_COMPILER is at x bytes offset in the file.
*)
; saw_compiler_halt_offset : (int option) ref
; recursion_depth : int ref
}[@@deriving show]
let make_env
?(codegen = false )
?(systemlib_compat_mode = false )
?(php5_compat_mode = false )
?(elaborate_namespaces = true )
?(include_line_comments = false )
?(keep_errors = true )
?(ignore_pos = false )
?(quick_mode = false )
?(lower_coroutines = true )
?(enable_hh_syntax = false )
?(fail_open = true )
?(parser_options = ParserOptions.default )
?(fi_mode = FileInfo.Mpartial )
?(is_hh_file = false )
?stats
?(hacksperimental = false )
(file : Relative_path.t)
: env
= let parser_options = ParserOptions.with_hh_syntax_for_hhvm parser_options
(codegen && (enable_hh_syntax || is_hh_file)) in
{ is_hh_file
; codegen = codegen || systemlib_compat_mode
; systemlib_compat_mode
; php5_compat_mode
; elaborate_namespaces
; include_line_comments
; keep_errors
; quick_mode =
not codegen
&& (match fi_mode with
| FileInfo.Mdecl
| FileInfo.Mphp -> true
| _ -> quick_mode
)
; lower_coroutines
; enable_hh_syntax
; parser_options
; fi_mode
; fail_open
; file
; stats
; hacksperimental
; top_level_statements = true
; ignore_pos
; max_depth = 42
; saw_yield = false
; unsafes = ISet.empty
; saw_std_constant_redefinition = false
; saw_compiler_halt_offset = ref None
; recursion_depth = ref 0
}
type result =
{ fi_mode : FileInfo.mode
; is_hh_file : bool
; ast : Ast.program
; content : string
; file : Relative_path.t
; comments : (Pos.t * comment) list
} [@@deriving show]
module WithPositionedSyntax(Syntax : Positioned_syntax_sig.PositionedSyntax_S) = struct
(* What we're lowering from *)
open Syntax
type node = Syntax.t
module Token = Syntax.Token
module Trivia = Token.Trivia
module TriviaKind = Trivia.TriviaKind
module SyntaxKind = Full_fidelity_syntax_kind
module TK = Full_fidelity_token_kind
module SourceText = Trivia.SourceText
module NS = Namespaces
let drop_pstr : int -> pstring -> pstring = fun cnt (pos, str) ->
let len = String.length str in
pos, if cnt >= len then "" else String.sub str cnt (len - cnt)
let non_tls env = if not env.top_level_statements then env else
{ env with top_level_statements = false }
type +'a parser = node -> env -> 'a
type ('a, 'b) metaparser = 'a parser -> 'b parser
let underscore = Str.regexp "_"
let quoted = Str.regexp "[ \t\n\r\012]*\"\\(\\(.\\|\n\\)*\\)\""
let whitespace = Str.regexp "[ \t\n\r\012]+"
let hashbang = Str.regexp "^#!.*\n"
let ignore_error = Str.regexp "HH_\\(FIXME\\|IGNORE_ERROR\\)[ \\t\\n]*\\[?\\([0-9]+\\)\\]?"
let namespace_use = Str.regexp "[^\\\\]*$"
let mode_annotation = function
| FileInfo.Mphp -> FileInfo.Mdecl
| m -> m
let syntax_to_list include_separators node =
let rec aux acc syntax_list =
match syntax_list with
| [] -> acc
| h :: t ->
begin
match syntax h with
| ListItem { list_item; list_separator } ->
let acc = list_item :: acc in
let acc =
if include_separators then (list_separator :: acc ) else acc in
aux acc t
| _ -> aux (h :: acc) t
end in
match syntax node with
| Missing -> [ ]
| SyntaxList s -> List.rev (aux [] s)
| ListItem { list_item; list_separator } ->
if include_separators then [ list_item; list_separator ] else [ list_item ]
| _ -> [ node ]
let syntax_to_list_no_separators = syntax_to_list false
let pPos : Pos.t parser = fun node env ->
if env.ignore_pos
then Pos.none
else Option.value ~default:Pos.none (position_exclusive env.file node)
let raise_parsing_error env node msg =
if not env.quick_mode && env.keep_errors then
let p = pPos node env in
Errors.parsing_error (p, msg)
else if env.codegen && not env.lower_coroutines then
let p = (Option.value (position env.file node) ~default:Pos.none) in
let (s, e) = Pos.info_raw p in
let e = SyntaxError.make ~error_type:SyntaxError.ParseError s e msg in
raise @@ SyntaxError.ParserFatal (e, p)
else ()
(* HHVM starts range of function declaration from the 'function' keyword *)
let pFunction node env =
let p = pPos node env in
match syntax node with
| FunctionDeclaration { function_declaration_header = h; _ }
| MethodishDeclaration { methodish_function_decl_header = h; _ }
when env.codegen ->
begin match syntax h with
| FunctionDeclarationHeader { function_keyword = f; _ }
when not (is_missing f) ->
(* For continuation compilation, we end up with spans across files :-( *)
Pos.btw_nocheck (pPos f env) p
| _ -> p
end
| _ -> p
exception Lowerer_invariant_failure of string * string
let invariant_failure node msg env =
let pos = Pos.string (Pos.to_absolute (pPos node env)) in
raise (Lowerer_invariant_failure (pos, msg))
let scuba_table = Scuba.Table.of_name "hh_missing_lowerer_cases"
let log_missing ?(caught = false) ~(env:env) ~expecting node : unit =
EventLogger.log_if_initialized @@ fun () ->
let source = source_text node in
let start = start_offset node in
let end_ = end_offset node in
let pos = SourceText.relative_pos env.file source start end_ in
let file = Relative_path.to_absolute env.file in
let contents =
let context_size = 5000 in
let start = max 0 (start - context_size) in
let length = min (2 * context_size) (SourceText.length source - start) in
SourceText.sub source start length
in
let kind = SyntaxKind.to_string (Syntax.kind node) in
let line = Pos.line pos in
let column = Pos.start_cnum pos in
let synthetic = is_synthetic node in
Scuba.new_sample (Some scuba_table)
|> Scuba.add_normal "filename" file
|> Scuba.add_normal "expecting" expecting
|> Scuba.add_normal "contents" contents
|> Scuba.add_normal "found_kind" kind
|> Scuba.add_int "line" line
|> Scuba.add_int "column" column
|> Scuba.add_int "is_synthetic" (if synthetic then 1 else 0)
|> Scuba.add_int "caught" (if caught then 1 else 0)
|> EventLogger.log
exception API_Missing_syntax of string * env * node
let missing_syntax : ?fallback:'a -> string -> node -> env -> 'a =
fun ?fallback expecting node env ->
match fallback with
| Some x when env.fail_open ->
let () = log_missing ~env ~expecting node in
x
| _ -> raise (API_Missing_syntax (expecting, env, node))
let runP : 'a parser -> node -> env -> 'a = fun pThing thing env ->
try pThing thing env with
| API_Missing_syntax (s, env, n) ->
let pos = Pos.string (Pos.to_absolute (pPos n env)) in
let msg = Printf.sprintf
"missing case in %s.
- pos: %s
- unexpected: '%s'
- kind: %s
"
s
pos
(text n)
(SyntaxKind.to_string (kind n))
in
raise (Failure msg)
(* TODO: Cleanup this hopeless Noop mess *)
let mk_noop pos : stmt list -> stmt list = function
| [] -> [pos, Noop]
| s -> s
let mpStripNoop pThing node env = match pThing node env with
| [_, Noop] -> []
| stmtl -> stmtl
let mpOptional : ('a, 'a option) metaparser = fun p -> fun node env ->
match syntax node with
| Missing -> None
| _ -> Some (p node env)
let mpYielding : ('a, ('a * bool)) metaparser = fun p node env ->
let outer_saw_yield = env.saw_yield in
let () = env.saw_yield <- false in
let result = p node env in
let result = result, env.saw_yield in
let () = env.saw_yield <- outer_saw_yield in
result
type expr_location =
| TopLevel
| MemberSelect
| InDoubleQuotedString
| InBacktickedString
| InGlobalVar
| AsStatement
| RightOfAssignment
| RightOfReturn
let in_string l =
l = InDoubleQuotedString || l = InBacktickedString
let pos_qualified_name node env =
let aux p =
match syntax p with
| ListItem li -> (text li.list_item) ^ (text li.list_separator)
| _ -> text p in
let p = pPos node env in
let name =
match syntax node with
| QualifiedName {
qualified_name_parts = { syntax = SyntaxList l; _ };
} ->
String.concat "" @@ List.map ~f:aux l
| _ -> missing_syntax "qualified name" node env in
p, name
let rec pos_name node env =
match syntax node with
| QualifiedName _ -> pos_qualified_name node env
| SimpleTypeSpecifier { simple_type_specifier = s } -> pos_name s env
| _ ->
let name = text node in
let local_ignore_pos = env.ignore_pos in
(* Special case for __LINE__; never ignore position for that special name *)
if name = "__LINE__" then env.ignore_pos <- false;
if name = "__COMPILER_HALT_OFFSET__" then env.saw_compiler_halt_offset := Some 0;
let p = pPos node env in
env.ignore_pos <- local_ignore_pos;
p, name
let is_ret_by_ref node = not @@ is_missing node
let couldMap : 'a . f:'a parser -> 'a list parser = fun ~f -> fun node env ->
let rec synmap : 'a . 'a parser -> 'a list parser = fun f node env ->
match syntax node with
| SyntaxList l -> List.concat_map l ~f:(fun n -> go ~f n env)
| ListItem i -> [f i.list_item env]
| _ -> [f node env]
and go : 'a . f:'a parser -> 'a list parser = fun ~f -> function
| node when is_missing node -> fun _env -> []
| node -> synmap f node
in
go ~f node env
let as_list : node -> node list =
let strip_list_item = function
| { syntax = ListItem { list_item = i; _ }; _ } -> i
| x -> x
in function
| { syntax = SyntaxList ({syntax = ListItem _; _}::_ as synl); _ } ->
List.map ~f:strip_list_item synl
| { syntax = SyntaxList synl; _ } -> synl
| { syntax = Missing; _ } -> []
| syn -> [syn]
let token_kind : node -> TK.t option = function
| { syntax = Token t; _ } -> Some (Token.kind t)
| _ -> None
let pBop : (expr -> expr -> expr_) parser = fun node env lhs rhs ->
match token_kind node with
| Some TK.Equal -> Binop (Eq None, lhs, rhs)
| Some TK.Bar -> Binop (Bar, lhs, rhs)
| Some TK.Ampersand -> Binop (Amp, lhs, rhs)
| Some TK.Plus -> Binop (Plus, lhs, rhs)
| Some TK.Minus -> Binop (Minus, lhs, rhs)
| Some TK.Star -> Binop (Star, lhs, rhs)
| Some TK.Or ->
if not env.codegen then raise_parsing_error env node SyntaxError.do_not_use_or;
Binop (BArbar, lhs, rhs)
| Some TK.And ->
if not env.codegen then raise_parsing_error env node SyntaxError.do_not_use_and;
Binop (AMpamp, lhs, rhs)
| Some TK.Xor ->
if not env.codegen then raise_parsing_error env node SyntaxError.do_not_use_xor;
Binop (LogXor, lhs, rhs)
| Some TK.Carat -> Binop (Xor, lhs, rhs)
| Some TK.Slash -> Binop (Slash, lhs, rhs)
| Some TK.Dot -> Binop (Dot, lhs, rhs)
| Some TK.Percent -> Binop (Percent, lhs, rhs)
| Some TK.LessThan -> Binop (Lt, lhs, rhs)
| Some TK.GreaterThan -> Binop (Gt, lhs, rhs)
| Some TK.EqualEqual -> Binop (Eqeq, lhs, rhs)
| Some TK.LessThanEqual -> Binop (Lte, lhs, rhs)
| Some TK.GreaterThanEqual -> Binop (Gte, lhs, rhs)
| Some TK.StarStar -> Binop (Starstar, lhs, rhs)
| Some TK.ExclamationEqual -> Binop (Diff, lhs, rhs)
| Some TK.BarEqual -> Binop (Eq (Some Bar), lhs, rhs)
| Some TK.PlusEqual -> Binop (Eq (Some Plus), lhs, rhs)
| Some TK.MinusEqual -> Binop (Eq (Some Minus), lhs, rhs)
| Some TK.StarEqual -> Binop (Eq (Some Star), lhs, rhs)
| Some TK.StarStarEqual -> Binop (Eq (Some Starstar),lhs, rhs)
| Some TK.SlashEqual -> Binop (Eq (Some Slash), lhs, rhs)
| Some TK.DotEqual -> Binop (Eq (Some Dot), lhs, rhs)
| Some TK.PercentEqual -> Binop (Eq (Some Percent), lhs, rhs)
| Some TK.CaratEqual -> Binop (Eq (Some Xor), lhs, rhs)
| Some TK.AmpersandEqual -> Binop (Eq (Some Amp), lhs, rhs)
| Some TK.BarBar -> Binop (BArbar, lhs, rhs)
| Some TK.AmpersandAmpersand -> Binop (AMpamp, lhs, rhs)
| Some TK.LessThanLessThan -> Binop (Ltlt, lhs, rhs)
| Some TK.GreaterThanGreaterThan -> Binop (Gtgt, lhs, rhs)
| Some TK.EqualEqualEqual -> Binop (EQeqeq, lhs, rhs)
| Some TK.LessThanLessThanEqual -> Binop (Eq (Some Ltlt), lhs, rhs)
| Some TK.GreaterThanGreaterThanEqual -> Binop (Eq (Some Gtgt), lhs, rhs)
| Some TK.LessThanGreaterThan -> Binop (Diff, lhs, rhs)
| Some TK.ExclamationEqualEqual -> Binop (Diff2, lhs, rhs)
| Some TK.LessThanEqualGreaterThan -> Binop (Cmp, lhs, rhs)
| Some TK.QuestionQuestion -> Binop (QuestionQuestion, lhs, rhs)
| Some TK.QuestionQuestionEqual -> Binop (Eq (Some QuestionQuestion), lhs, rhs)
(* The ugly duckling; In the FFP, `|>` is parsed as a
* `BinaryOperator`, whereas the typed AST has separate constructors for
* Pipe and Binop. This is why we don't just project onto a
* `bop`, but a `expr -> expr -> expr_`.
*)
| Some TK.BarGreaterThan -> Pipe (lhs, rhs)
| Some TK.QuestionColon -> Eif (lhs, None, rhs)
(* TODO: Figure out why this fails silently when used in a pBlock; probably
just caught somewhere *)
| _ -> missing_syntax "binary operator" node env
let pImportFlavor : import_flavor parser = fun node env ->
match token_kind node with
| Some TK.Include -> Include
| Some TK.Require -> Require
| Some TK.Include_once -> IncludeOnce
| Some TK.Require_once -> RequireOnce
| _ -> missing_syntax "import flavor" node env
let pNullFlavor : og_null_flavor parser = fun node env ->
match token_kind node with
| Some TK.QuestionMinusGreaterThan -> OG_nullsafe
| Some TK.MinusGreaterThan -> OG_nullthrows
| _ -> missing_syntax "null flavor" node env
type modifiers = {
has_async: bool;
has_coroutine: bool;
kinds: kind list
}
let pModifiers check_modifier node env =
let f (has_async, has_coroutine, kinds) node =
let add_kind k =
check_modifier node;
k :: kinds
in
match token_kind node with
| Some TK.Final -> has_async, has_coroutine, add_kind Final
| Some TK.Static -> has_async, has_coroutine, add_kind Static
| Some TK.Abstract -> has_async, has_coroutine, add_kind Abstract
| Some TK.Private -> has_async, has_coroutine, add_kind Private
| Some TK.Public -> has_async, has_coroutine, add_kind Public
| Some TK.Protected -> has_async, has_coroutine, add_kind Protected
| Some TK.Var -> has_async, has_coroutine, add_kind Public
| Some TK.Async -> true, has_coroutine, kinds
| Some TK.Coroutine -> has_async, true, kinds
| _ -> missing_syntax "kind" node env in
let (has_async, has_coroutine, kinds) =
Core_list.fold_left ~init:(false, false, []) ~f (as_list node) in
{ has_async; has_coroutine; kinds = List.rev kinds }
let pKinds check_modifier node env =
(pModifiers check_modifier node env).kinds
let pParamKind : param_kind parser = fun node env ->
match token_kind node with
| Some TK.Inout -> Pinout
| _ -> missing_syntax "param kind" node env
(* TODO: Clean up string escaping *)
let prepString2 env : node list -> node list =
let is_double_quote_or_backtick ch = ch = '"' || ch = '`' in
let is_binary_string_header s =
(String.length s > 1) && (s.[0] = 'b') && (s.[1] = '"') in
let trimLeft = Token.trim_left in
let trimRight = Token.trim_right in
function
| ({ syntax = Token t; _ } as node::ss)
when (Token.width t) > 0 &&
((is_double_quote_or_backtick (Token.text t).[0])
|| is_binary_string_header (Token.text t)) ->
let rec unwind = function
| [{ syntax = Token t; _ }]
when (Token.width t) > 0 &&
is_double_quote_or_backtick ((Token.text t).[(Token.width t) - 1]) ->
let s = make_token (trimRight ~n:1 t) in
if width s > 0 then [s] else []
| x :: xs -> x :: unwind xs
| _ -> raise_parsing_error env node "Malformed String2 SyntaxList"; []
in
(* Trim the starting b and double quote *)
let left_trim = if (Token.text t).[0] = 'b' then 2 else 1 in
let s = make_token (trimLeft ~n:left_trim t) in
if width s > 0 then s :: unwind ss else unwind ss
| ({ syntax = Token t; _ } as node ::ss)
when (Token.width t) > 3 && String.sub (Token.text t) 0 3 = "<<<" ->
let rec unwind = function
| [{ syntax = Token t; _ }] when (Token.width t) > 0 ->
let content = Token.text t in
let len = (Token.width t) in
let n = len - (String.rindex_from content (len - 2) '\n') in
let s = make_token (trimRight ~n t) in
if width s > 0 then [s] else []
| x :: xs -> x :: unwind xs
| _ -> raise_parsing_error env node "Malformed String2 SyntaxList"; []
in
let content = Token.text t in
let n = (String.index content '\n') + 1 in
let s = make_token (trimLeft ~n t) in
if width s > 0 then s :: unwind ss else unwind ss
| x -> x (* unchanged *)
let extract_unquoted_string ~start ~len content =
try (* Using String.sub; Invalid_argument when str too short *)
if len >= 3 && String.sub content start 3 = "<<<" (* The heredoc case *)
then
(* These types of strings begin with an opening line containing <<<
* followed by a string to use as a terminator (which is optionally
* quoted) and end with a line containing only the terminator and a
* semicolon followed by a blank line. We need to drop the opening line
* as well as the blank line and preceding terminator line.
*)
let start_ = String.index_from content start '\n' + 1 in
let end_ = String.rindex_from content (start + len - 2) '\n' in
(* An empty heredoc, this way, will have start >= end *)
if start_ >= end_ then "" else String.sub content start_ (end_ - start_)
else
match String.get content start, String.get content (start + len - 1) with
| '"', '"' | '\'', '\'' | '`', '`' ->
String.sub content (start + 1) (len - 2)
| _ ->
if start = 0 && len = String.length content then
content
else
String.sub content start len
with Invalid_argument _ | Not_found -> content
let mkStr env node : (string -> string) -> string -> string = fun unescaper content ->
let content = if String.length content > 0 && content.[0] = 'b'
then String.sub content 1 (String.length content - 1) else content in
let len = String.length content in
let no_quotes = extract_unquoted_string ~start:0 ~len content in
try unescaper no_quotes with
| Php_escaping.Invalid_string _ ->
raise_parsing_error env
node (Printf.sprintf "Malformed string literal <<%s>>" no_quotes);
""
let unempty_str = function
| "''" | "\"\"" -> ""
| s -> s
let unesc_dbl s = unempty_str @@ Php_escaping.unescape_double s
let get_quoted_content s =
let open Str in
if string_match (quoted) s 0
then matched_group 1 s
else s
let unesc_xhp s =
Str.global_replace whitespace " " s
let unesc_xhp_attr s =
unesc_dbl @@ get_quoted_content s
type suspension_kind =
| SKSync
| SKAsync
| SKCoroutine
let mk_suspension_kind_ node env has_async has_coroutine =
match has_async, has_coroutine with
| false, false -> SKSync
| true, false -> SKAsync
| false, true -> SKCoroutine
| true, true ->
raise_parsing_error env node "Coroutine functions may not be async";
SKCoroutine
let mk_suspension_kind node env is_async is_coroutine =
mk_suspension_kind_ node env
(not (is_missing is_async))
(not (is_missing is_coroutine))
let mk_fun_kind suspension_kind yield =
match suspension_kind, yield with
| SKSync, true -> FGenerator
| SKAsync, true -> FAsyncGenerator
| SKSync, false -> FSync
| SKAsync, false -> FAsync
| SKCoroutine, _ -> FCoroutine
(* Yield in coroutine is not permitted, the error will be reported at NastCheck *)
let fun_template yielding node suspension_kind env =
let p = pFunction node env in
{ f_mode = mode_annotation env.fi_mode
; f_tparams = []
; f_constrs = []
; f_ret = None
; f_ret_by_ref = false
; f_name = p, ";anonymous"
; f_params = []
; f_body = []
; f_user_attributes = []
; f_fun_kind = mk_fun_kind suspension_kind yielding
; f_namespace = Namespace_env.empty env.parser_options
; f_span = p
; f_doc_comment = None
; f_static = false
}
let param_template node env =
{ param_hint = None
; param_is_reference = false
; param_is_variadic = false
; param_id = pos_name node env
; param_expr = None
; param_modifier = None
; param_callconv = None
; param_user_attributes = []
}
let pShapeFieldName : shape_field_name parser = fun name env ->
match syntax name with
| ScopeResolutionExpression
{ scope_resolution_qualifier; scope_resolution_name; _ } ->
SFclass_const
( pos_name scope_resolution_qualifier env
, pos_name scope_resolution_name env
)
| _ -> let p, n = pos_name name env in SFlit (p, mkStr env name unesc_dbl n)
let mpShapeExpressionField : ('a, (shape_field_name * 'a)) metaparser =
fun hintParser node env ->
match syntax node with
| FieldInitializer
{ field_initializer_name = name; field_initializer_value = ty; _ } ->
let name = pShapeFieldName name env in
let ty = hintParser ty env in
name, ty
| _ -> missing_syntax "shape field" node env
let mpShapeField : ('a, shape_field) metaparser =
fun hintParser node env ->
match syntax node with
| FieldSpecifier { field_question; field_name; field_type; _ } ->
let sf_optional = not (is_missing field_question) in
let sf_name = pShapeFieldName field_name env in
let sf_hint = hintParser field_type env in
{ sf_optional; sf_name; sf_hint }
| _ ->
let sf_name, sf_hint = mpShapeExpressionField hintParser node env in
(* Shape expressions can never have optional fields. *)
{ sf_optional = false; sf_name; sf_hint }
let mpClosureParameter : ('a, hint * param_kind option) metaparser =
fun hintParser node env ->
match syntax node with
| ClosureParameterTypeSpecifier
{ closure_parameter_call_convention
; closure_parameter_type
} ->
let cp_kind =
mpOptional pParamKind closure_parameter_call_convention env in
let cp_hint = hintParser closure_parameter_type env in
cp_hint, cp_kind
| _ -> missing_syntax "closure parameter" node env
(* In some cases, we need to unwrap an extra layer of Block due to lowering
* from CompoundStatement. This applies to `if`, `while` and other control flow
* statements which allow optional curly braces.
*
* In other words, we want these to be lowered into the same Ast
* `if ($b) { func(); }` and `if ($b) func();`
* rather than the left hand side one having an extra `Block` in the Ast
*)
let unwrap_extra_block (stmt : block) : block =
let de_noop = function
| [_, Noop] -> []
| stmts -> stmts
in
match stmt with
| [pos, Unsafe; _, Block b] -> (pos, Unsafe) :: de_noop b
| [_, Block b] -> de_noop b
| blk -> blk
let rec pHint : hint parser = fun node env ->
let rec pHint_ : hint_ parser = fun node env ->
match syntax node with
(* Dirty hack; CastExpression can have type represented by token *)
| Token _
| SimpleTypeSpecifier _
| QualifiedName _
-> Happly (pos_name node env, [])
| ShapeTypeSpecifier { shape_type_fields; shape_type_ellipsis; _ } ->
let si_allows_unknown_fields =
not (is_missing shape_type_ellipsis)
in
let si_shape_field_list =
couldMap ~f:(mpShapeField pHint) shape_type_fields env in
Hshape { si_allows_unknown_fields; si_shape_field_list }
| TupleTypeSpecifier { tuple_types; _ } ->
Htuple (couldMap ~f:pHint tuple_types env)
| KeysetTypeSpecifier { keyset_type_keyword = kw; keyset_type_type = ty; _ }
| VectorTypeSpecifier { vector_type_keyword = kw; vector_type_type = ty; _ }
| ClassnameTypeSpecifier {classname_keyword = kw; classname_type = ty; _ }
| TupleTypeExplicitSpecifier
{ tuple_type_keyword = kw
; tuple_type_types = ty
; _ }
| VarrayTypeSpecifier
{ varray_keyword = kw
; varray_type = ty
; _ }
| VectorArrayTypeSpecifier
{ vector_array_keyword = kw
; vector_array_type = ty
; _ }
-> Happly (pos_name kw env, couldMap ~f:pHint ty env)
| DarrayTypeSpecifier
{ darray_keyword = kw
; darray_key = key
; darray_value = value
; _ }
| MapArrayTypeSpecifier
{ map_array_keyword = kw
; map_array_key = key
; map_array_value = value
; _ } ->
Happly
( pos_name kw env
, pHint key env :: couldMap ~f:pHint value env
)
| DictionaryTypeSpecifier
{ dictionary_type_keyword = kw
; dictionary_type_members = members
; _ } -> Happly (pos_name kw env, couldMap ~f:pHint members env)
| GenericTypeSpecifier { generic_class_type; generic_argument_list } ->
Happly
( pos_name generic_class_type env
, match syntax generic_argument_list with
| TypeArguments { type_arguments_types; _ }
-> couldMap ~f:pHint type_arguments_types env
| _ -> missing_syntax "generic type arguments" generic_argument_list env
)
| NullableTypeSpecifier { nullable_type; _ } ->
Hoption (pHint nullable_type env)
| SoftTypeSpecifier { soft_type; _ } ->
Hsoft (pHint soft_type env)
| ClosureTypeSpecifier {
closure_parameter_list;
closure_return_type;
closure_coroutine; _} ->
let make_variadic_hint variadic_type =
if is_missing variadic_type
then Hvariadic (None)
else Hvariadic (Some (pHint variadic_type env))
in
let (param_list, variadic_hints) =
List.partition_map ~f:(fun x ->
match syntax x with
| VariadicParameter { variadic_parameter_type = vtype; _ } ->
`Snd (make_variadic_hint vtype)
| _ -> `Fst (mpClosureParameter pHint x env))
(as_list closure_parameter_list)
in
let hd_variadic_hint hints =
if List.length hints > 1 then begin
let msg = Printf.sprintf
"%d variadic parameters found. There should be no more than one."
(List.length hints)
in
invariant_failure node msg env
end;
match List.hd hints with
| Some h -> h
| None -> Hnon_variadic
in
let is_coroutine = not (is_missing closure_coroutine) in
let param_type_hints = List.map param_list fst in
let param_callconvs = List.map param_list snd in
Hfun
( is_coroutine
, param_type_hints
, param_callconvs
, hd_variadic_hint variadic_hints
, pHint closure_return_type env
)
| TypeConstant { type_constant_left_type; type_constant_right_type; _ } ->
let child = pos_name type_constant_right_type env in
(match pHint_ type_constant_left_type env with
| Haccess (b, c, cs) -> Haccess (b, c, cs @ [child])
| Happly (b, []) -> Haccess (b, child, [])
| _ -> missing_syntax "type constant base" node env
)
| _ -> missing_syntax "type hint" node env
in
pPos node env, pHint_ node env
type fun_hdr =
{ fh_suspension_kind : suspension_kind
; fh_name : pstring
; fh_constrs : (hint * constraint_kind * hint) list
; fh_type_parameters : tparam list
; fh_parameters : fun_param list
; fh_return_type : hint option
; fh_param_modifiers : fun_param list
; fh_ret_by_ref : bool
}
let empty_fun_hdr =
{ fh_suspension_kind = SKSync
; fh_name = Pos.none, "<ANONYMOUS>"
; fh_constrs = []
; fh_type_parameters = []
; fh_parameters = []
; fh_return_type = None
; fh_param_modifiers = []
; fh_ret_by_ref = false
}
let prevent_intrinsic_generic env node ty =
if not (is_missing ty) && not env.codegen then
raise_parsing_error env node SyntaxError.collection_intrinsic_generic
let rec pSimpleInitializer node env =
match syntax node with
| SimpleInitializer { simple_initializer_value; simple_initializer_equal } ->
pExpr simple_initializer_value env
| _ -> missing_syntax "simple initializer" node env
and pFunParamDefaultValue node env =
match syntax node with
| SimpleInitializer { simple_initializer_value; _ } ->
begin match syntax simple_initializer_value with
| ListExpression _ ->
raise_parsing_error env node (SyntaxError.invalid_default_argument "A list destructuring")
| YieldExpression _
| YieldFromExpression _ ->
raise_parsing_error env node (SyntaxError.invalid_default_argument "A yield")
| PrefixUnaryExpression {
prefix_unary_operator = { syntax = Token t; _ }; _ } when Token.kind t = TK.Await ->
raise_parsing_error env node (SyntaxError.invalid_default_argument "An await")
| _ -> () end;
mpOptional pExpr simple_initializer_value env
| _ -> None
and pFunParam : fun_param parser = fun node env ->
match syntax node with
| ParameterDeclaration
{ parameter_attribute
; parameter_visibility
; parameter_call_convention
; parameter_type
; parameter_name
; parameter_default_value
} ->
let is_reference, is_variadic, name =
match syntax parameter_name with
| DecoratedExpression
{ decorated_expression_decorator; decorated_expression_expression } ->
(* There is a chance that the expression might be nested with an
additional decorator, check this *)
begin match syntax decorated_expression_expression with
| DecoratedExpression
{ decorated_expression_decorator = nested_decorator
; decorated_expression_expression = nested_expression } ->
let decorator = text decorated_expression_decorator in
let nested_decorator = text nested_decorator in
decorator = "&" || nested_decorator = "&",
decorator = "..." || nested_decorator = "...",
nested_expression
| _ ->
let decorator = text decorated_expression_decorator in
decorator = "&", decorator = "...", decorated_expression_expression
end
| _ -> false, false, parameter_name
in
{ param_hint = mpOptional pHint parameter_type env
; param_is_reference = is_reference
; param_is_variadic = is_variadic
; param_id = pos_name name env
; param_expr = pFunParamDefaultValue parameter_default_value env
; param_user_attributes = pUserAttributes env parameter_attribute
; param_callconv =
mpOptional pParamKind parameter_call_convention env
(* implicit field via constructor parameter.
* This is always None except for constructors and the modifier
* can be only Public or Protected or Private.
*)
; param_modifier =
let rec go = function
| [] -> None
| x :: _ when List.mem [Private; Public; Protected] x -> Some x
| _ :: xs -> go xs
in
go (pKinds (fun _ -> ()) parameter_visibility env)
}
| VariadicParameter _
| Token _ when text node = "..."
-> { (param_template node env) with param_is_variadic = true }
| _ -> missing_syntax "function parameter" node env
and pUserAttribute : user_attribute list parser = fun node env ->
match syntax node with
| AttributeSpecification { attribute_specification_attributes; _ } ->
couldMap attribute_specification_attributes env ~f:begin function
| { syntax = Attribute { attribute_name; attribute_values; _}; _ } ->
fun env ->
{ ua_name = pos_name attribute_name env
; ua_params = couldMap ~f:pExpr attribute_values env
}
| node -> missing_syntax "attribute" node
end
| _ -> missing_syntax "attribute specification" node env
and pUserAttributes env attrs =
List.concat @@ couldMap ~f:pUserAttribute attrs env
and pAField : afield parser = fun node env ->
match syntax node with
| ElementInitializer { element_key; element_value; _ } ->
AFkvalue (pExpr element_key env, pExpr element_value env)
| _ -> AFvalue (pExpr node env)
and pString2: expr_location -> node list -> env -> expr list =
let rec convert_name_to_lvar location env n =
match syntax n with
| Token token when Token.kind token = TK.Name ->
let pos, name = pos_name n env in
let id = Lvar (pos, "$" ^ name) in
Some (pos, id)
| SubscriptExpression { subscript_receiver; subscript_index; _ } ->
begin match convert_name_to_lvar location env subscript_receiver with
| Some recv ->
let index = mpOptional (pExpr ~location) subscript_index env in
Some (pPos n env, Array_get (recv, index))
| _ -> None
end
| _ -> None in
let rec aux loc l env acc =
(* in PHP "${x}" in strings is treated as if it was written "$x",
here we recognize pattern: Dollar; EmbeddedBracedExpression { QName (Token.Name) }
produced by FFP and lower it into Lvar.
*)
match l with
| [] -> List.rev acc
| ({ syntax = Token token; _ })::
({ syntax = EmbeddedBracedExpression {
embedded_braced_expression_expression = e; _ }; _
} as expr_with_braces)::
tl when Token.kind token = TK.Dollar ->
let e =
begin match convert_name_to_lvar loc env e with
| Some e -> e
| None ->
let e = pExpr ~location:loc expr_with_braces env in
fst e, Dollar (fst e, BracedExpr e)
end in
aux loc tl env (e::acc)
| x::xs -> aux loc xs env ((pExpr ~location:loc x env)::acc)
in
fun loc l env -> aux loc l env []
and pExprL node env =
(pPos node env, Expr_list (couldMap ~f:pExpr node env))
(* TODO: this function is a hotspot, deep recursion on huge files, attempt more optimization *)
and pMember node env =
match syntax node with
| ElementInitializer { element_key; element_value; _ } ->
(pExpr element_key env, pExpr element_value env)
| _ -> missing_syntax "darray intrinsic expression element" node env
and pExpr ?location:(location=TopLevel) : expr parser = fun node env ->
let split_args_varargs arg_list =
match List.rev (as_list arg_list) with
| { syntax = DecoratedExpression
{ decorated_expression_decorator =
{ syntax = Token token; _ }
; decorated_expression_expression = e
}
; _
} :: xs when Token.kind token = TK.DotDotDot ->
let args = List.rev_map xs (fun x -> pExpr x env) in
let vararg = pExpr e env in
args, [vararg]
| _ ->
let args = couldMap ~f:pExpr arg_list env in
args, [] in
let rec pExpr_ : expr_ parser = fun node env ->
env.recursion_depth := !(env.recursion_depth) + 1;
if !(env.recursion_depth) > recursion_limit then
failwith "Expression recursion limit reached";
let pos = pPos node env in
let result = match syntax node with
| LambdaExpression {
lambda_async; lambda_coroutine; lambda_signature; lambda_body;
lambda_attribute_spec; _ } ->
let suspension_kind = mk_suspension_kind node env lambda_async lambda_coroutine in
let f_params, f_ret =
match syntax lambda_signature with