diff --git a/lang_php/parsing/ast_php.ml b/lang_php/parsing/ast_php.ml index 0dae46790..7b01bf34b 100644 --- a/lang_php/parsing/ast_php.ml +++ b/lang_php/parsing/ast_php.ml @@ -1,14 +1,14 @@ (*s: ast_php.ml *) (*s: Facebook copyright *) (* Yoann Padioleau - * + * * Copyright (C) 2009-2011 Facebook * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 2.1 as published by the Free Software Foundation, with the * special exception on linking described in file license.txt. - * + * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file @@ -79,9 +79,9 @@ and 'a wrap = 'a * tok (*x: AST info *) and 'a paren = tok * 'a * tok and 'a brace = tok * 'a * tok -and 'a bracket = tok * 'a * tok +and 'a bracket = tok * 'a * tok and 'a comma_list = ('a, tok (* the comma *)) Common.either list -and 'a comma_list_dots = +and 'a comma_list_dots = ('a, tok (* ... for sgrep *), tok (* the comma *)) Common.either3 list (*x: AST info *) (*s: tarzan annotation *) @@ -100,8 +100,8 @@ and 'a comma_list_dots = * and ; and + * forms of yield that hphp support (e.g. yield ; and * = yield ). One could then have a YieldReturn and YieldAssign * but this may change and none of the analysis in pfff need to * understand yield so for now just make it simple and add yield @@ -266,28 +266,28 @@ type expr = (*e: type exprbis hook *) (*s: type scalar and constant and encaps *) - and scalar = + and scalar = | C of constant | ClassConstant of qualifier * name - + | Guil of tok (* '"' or b'"' *) * encaps list * tok (* '"' *) - | HereDoc of - tok (* < < < EOF, or b < < < EOF *) * - encaps list * + | HereDoc of + tok (* < < < EOF, or b < < < EOF *) * + encaps list * tok (* EOF; *) (* | StringVarName??? *) (*s: type constant *) - and constant = + and constant = (*s: constant constructors *) | Int of string wrap | Double of string wrap (*x: constant constructors *) - (* see also Guil for interpolated strings + (* see also Guil for interpolated strings * The string does not contain the enclosing '"' or "'". * It does not contain either the possible 'b' prefix *) - | String of string wrap + | String of string wrap (*x: constant constructors *) | CName of name (* true, false, null, or defined constant *) (*x: constant constructors *) @@ -301,15 +301,15 @@ type expr = (*s: constant rest *) (*s: type cpp_directive *) (* http://php.net/manual/en/language.constants.predefined.php *) - and cpp_directive = + and cpp_directive = | Line | File | Dir - | ClassC | TraitC + | ClassC | TraitC | MethodC | FunctionC (*e: type cpp_directive *) (*e: constant rest *) (*e: type constant *) (*s: type encaps *) - and encaps = + and encaps = (*s: encaps constructors *) | EncapsString of string wrap (*x: encaps constructors *) @@ -327,29 +327,29 @@ type expr = (*e: type scalar and constant and encaps *) (*s: AST expression operators *) - and fixOp = Dec | Inc - and binaryOp = Arith of arithOp | Logical of logicalOp + and fixOp = Dec | Inc + and binaryOp = Arith of arithOp | Logical of logicalOp (*s: php concat operator *) | BinaryConcat (* . *) (*e: php concat operator *) - and arithOp = + and arithOp = | Plus | Minus | Mul | Div | Mod - | DecLeft | DecRight + | DecLeft | DecRight | And | Or | Xor - and logicalOp = - | Inf | Sup | InfEq | SupEq - | Eq | NotEq + and logicalOp = + | Inf | Sup | InfEq | SupEq + | Eq | NotEq (*s: php identity operators *) | Identical (* === *) | NotIdentical (* !== *) (*e: php identity operators *) | AndLog | OrLog | XorLog | AndBool | OrBool (* diff with AndLog ? short-circuit operators ? *) - and assignOp = AssignOpArith of arithOp + and assignOp = AssignOpArith of arithOp (*s: php assign concat operator *) | AssignConcat (* .= *) (*e: php assign concat operator *) - and unaryOp = + and unaryOp = | UnPlus | UnMinus | UnBang | UnTilde @@ -358,37 +358,37 @@ type expr = (*e: AST expression operators *) (*s: AST expression rest *) - and list_assign = + and list_assign = | ListVar of lvalue | ListList of tok * list_assign comma_list paren | ListEmpty (*x: AST expression rest *) - and array_pair = + and array_pair = | ArrayExpr of expr | ArrayRef of tok (* & *) * lvalue | ArrayArrowExpr of expr * tok (* => *) * expr | ArrayArrowRef of expr * tok (* => *) * tok (* & *) * lvalue (*x: AST expression rest *) - and class_name_reference = + and class_name_reference = | ClassNameRefStatic of class_name_or_kwd | ClassNameRefDynamic of lvalue * obj_prop_access list and obj_prop_access = tok (* -> *) * obj_property (*e: AST expression rest *) - and xhp_html = - | Xhp of xhp_tag wrap * xhp_attribute list * tok (* > *) * + and xhp_html = + | Xhp of xhp_tag wrap * xhp_attribute list * tok (* > *) * xhp_body list * xhp_tag option wrap | XhpSingleton of xhp_tag wrap * xhp_attribute list * tok (* /> *) and xhp_attribute = xhp_attr_name * tok (* = *) * xhp_attr_value and xhp_attr_name = string wrap (* e.g. task-bar *) - and xhp_attr_value = + and xhp_attr_value = | XhpAttrString of tok (* '"' *) * encaps list * tok (* '"' *) | XhpAttrExpr of expr brace (* sgrep: *) | SgrepXhpAttrValueMvar of string wrap - and xhp_body = + and xhp_body = | XhpText of string wrap | XhpExpr of expr brace | XhpNested of xhp_html @@ -402,7 +402,7 @@ and lvalue = (*s: type lvalue_info *) (*e: type lvalue_info *) (*s: lvaluebis constructors *) - | Var of dname * + | Var of dname * (*s: scope_php annotation *) Scope_php.phpscope ref (*e: scope_php annotation *) @@ -410,8 +410,8 @@ and lvalue = | This of tok (* xhp: normally we can not have a FunCall in the lvalue of VArrayAccess, * but with xhp we can. - * - * todo? a VArrayAccessSimple with Constant string in expr ? + * + * todo? a VArrayAccessSimple with Constant string in expr ? *) | VArrayAccess of lvalue * expr option bracket | VArrayAccessXhp of expr * expr option bracket @@ -420,7 +420,7 @@ and lvalue = | VBraceAccess of lvalue * expr brace (*x: lvaluebis constructors *) (* on the left of var *) - | Indirect of lvalue * indirect + | Indirect of lvalue * indirect (*x: lvaluebis constructors *) (* Note that even if A::$v['fld'] was parsed in the grammar * as a Qualifier(A, ArrayAccess($v, 'fld') we @@ -453,30 +453,30 @@ and lvalue = (*s: type lvalue aux *) and indirect = Dollar of tok (*x: type lvalue aux *) - and argument = + and argument = | Arg of expr | ArgRef of tok * w_variable (*x: type lvalue aux *) - and obj_access = + and obj_access = tok (* -> *) * obj_property * argument comma_list paren option - and obj_property = + and obj_property = | ObjProp of obj_dim | ObjPropVar of lvalue (* was originally var_without_obj *) - (* I would like to remove OName from here, as I inline most of them - * in the MethodCallSimple and ObjAccessSimple above, but they - * can also be mentionned in OArrayAccess in the obj_dim, so + (* I would like to remove OName from here, as I inline most of them + * in the MethodCallSimple and ObjAccessSimple above, but they + * can also be mentionned in OArrayAccess in the obj_dim, so * I keep it *) - and obj_dim = + and obj_dim = | OName of name | OBrace of expr brace | OArrayAccess of obj_dim * expr option bracket | OBraceAccess of obj_dim * expr brace (*e: type lvalue aux *) -(* semantic: those grammar rule names were used in the original PHP +(* semantic: those grammar rule names were used in the original PHP * lexer/parser but not enforced. It's just comments. *) and rw_variable = lvalue and r_variable = lvalue @@ -488,19 +488,19 @@ and w_variable = lvalue (* ------------------------------------------------------------------------- *) (*s: AST statement *) (* By introducing Lambda, expr and stmt are now mutually recursive *) -and stmt = +and stmt = (*s: stmt constructors *) | ExprStmt of expr * tok (* ; *) | EmptyStmt of tok (* ; *) (*x: stmt constructors *) | Block of stmt_and_def list brace (*x: stmt constructors *) - | If of tok * expr paren * stmt * + | If of tok * expr paren * stmt * (* elseif *) if_elseif list * (* else *) if_else option (*s: ifcolon *) - | IfColon of tok * expr paren * - tok * stmt_and_def list * new_elseif list * new_else option * + | IfColon of tok * expr paren * + tok * stmt_and_def list * new_elseif list * new_else option * tok * tok (* if(cond): * stmts; defs; @@ -511,12 +511,12 @@ and stmt = * endif; *) (*e: ifcolon *) | While of tok * expr paren * colon_stmt - | Do of tok * stmt * tok * expr paren * tok - | For of tok * tok * + | Do of tok * stmt * tok * expr paren * tok + | For of tok * tok * for_expr * tok * for_expr * tok * for_expr * - tok * + tok * colon_stmt | Switch of tok * expr paren * switch_case_list (*x: stmt constructors *) @@ -524,15 +524,15 @@ and stmt = * otherwise if it's a variable then it must be a foreach_variable *) | Foreach of tok * tok * expr * tok * foreach_var_either * - foreach_arrow option * tok * + foreach_arrow option * tok * colon_stmt (* example: foreach(expr as $lvalue) { colon_stmt } * foreach(expr as $foreach_varialbe => $lvalue) { colon_stmt} - *) + *) (*x: stmt constructors *) | Break of tok * expr option * tok | Continue of tok * expr option * tok - | Return of tok * expr option * tok + | Return of tok * expr option * tok (*x: stmt constructors *) | Throw of tok * expr * tok | Try of tok * stmt_and_def list brace * catch * catch list @@ -556,13 +556,13 @@ and stmt = | ClassDefNested of class_def (*s: AST statement rest *) - and switch_case_list = - | CaseList of + and switch_case_list = + | CaseList of tok (* { *) * tok option (* ; *) * case list * tok (* } *) - | CaseColonList of - tok (* : *) * tok option (* ; *) * case list * + | CaseColonList of + tok (* : *) * tok option (* ; *) * case list * tok (* endswitch *) * tok (* ; *) - and case = + and case = | Case of tok * expr * tok * stmt_and_def list | Default of tok * tok * stmt_and_def list @@ -574,16 +574,16 @@ and stmt = and foreach_variable = is_ref * lvalue and foreach_var_either = (foreach_variable, lvalue) Common.either (*x: AST statement rest *) - and catch = + and catch = tok * (fully_qualified_class_name * dname) paren * stmt_and_def list brace (*x: AST statement rest *) - and use_filename = + and use_filename = | UseDirect of string wrap | UseParen of string wrap paren (*x: AST statement rest *) and declare = name * static_scalar_affect (*x: AST statement rest *) - and colon_stmt = + and colon_stmt = | SingleStmt of stmt | ColonStmt of tok (* : *) * stmt_and_def list * tok (* endxxx *) * tok (* ; *) (*x: AST statement rest *) @@ -603,7 +603,7 @@ and func_def = { f_ref: is_ref; (* can be a Name("__lambda", fakeInfo()) when used for lambdas *) f_name: name; - f_params: parameter comma_list_dots paren; + f_params: parameter comma_list_dots paren; (* TODO: handle ... *) (* static-php-ext: *) f_return_type: hint_type option; (* the opening/closing brace can be (fakeInfo(), ';') for abstract methods *) @@ -611,7 +611,7 @@ and func_def = { (*s: f_type mutable field *) (*e: f_type mutable field *) } - and function_type = + and function_type = | FunctionRegular | FunctionLambda | MethodRegular @@ -625,7 +625,7 @@ and func_def = { p_default: static_scalar_affect option; } (*x: AST function definition rest *) - and hint_type = + and hint_type = | Hint of class_name_or_kwd (* only self/parent, no static *) | HintArray of tok (*x: AST function definition rest *) @@ -635,7 +635,7 @@ and func_def = { (*s: AST lambda definition *) (* the f_name in func_def should be a fake name *) and lambda_def = (lexical_vars option * func_def) - and lexical_vars = tok (* use *) * lexical_var comma_list paren + and lexical_vars = tok (* use *) * lexical_var comma_list paren and lexical_var = LexicalVar of is_ref * dname (* ------------------------------------------------------------------------- *) @@ -667,21 +667,21 @@ and class_def = { *) c_implements: interface option; (* The class_stmt for interfaces are restricted to only abstract methods. - * The class_stmt seems to be unrestricted for traits; can even + * The class_stmt seems to be unrestricted for traits; can even * have some 'use' *) c_body: class_stmt list brace; } (*s: type class_type *) - and class_type = + and class_type = | ClassRegular of tok (* class *) | ClassFinal of tok * tok (* final class *) | ClassAbstract of tok * tok (* abstract class *) | Interface of tok (* interface *) - (* PHP 5.4 traits: http://php.net/manual/en/language.oop5.traits.php + (* PHP 5.4 traits: http://php.net/manual/en/language.oop5.traits.php * Allow to mixin behaviors and data so it's really just * multiple inheritance with a cooler name. - * + * * note: traits are allowed only at toplevel. *) | Trait of tok (* trait *) @@ -694,10 +694,10 @@ and class_def = { (*e: type interface *) (*x: AST class definition *) (*x: AST class definition *) - and class_stmt = + and class_stmt = | ClassConstants of tok (* const *) * class_constant comma_list * tok (*;*) - | ClassVariables of - class_var_modifier * + | ClassVariables of + class_var_modifier * (* static-php-ext: *) hint_type option * class_variable comma_list * tok (* ; *) @@ -705,7 +705,7 @@ and class_def = { | XhpDecl of xhp_decl (* php 5.4, 'use' can appear in classes/traits (but not interface) *) - | UseTrait of tok (*use*) * name comma_list * + | UseTrait of tok (*use*) * name comma_list * (tok (* ; *), trait_rule list brace) Common.either (*s: class_stmt types *) @@ -713,7 +713,7 @@ and class_def = { (*x: class_stmt types *) and class_variable = dname * static_scalar_affect option (*x: class_stmt types *) - and class_var_modifier = + and class_var_modifier = | NoModifiers of tok (* 'var' *) | VModifiers of modifier wrap list (*x: class_stmt types *) @@ -724,25 +724,25 @@ and class_def = { *) and method_def = func_def (*x: class_stmt types *) - and modifier = + and modifier = | Public | Private | Protected | Static | Abstract | Final (*x: class_stmt types *) (*e: class_stmt types *) - and xhp_decl = - | XhpAttributesDecl of + and xhp_decl = + | XhpAttributesDecl of tok (* attribute *) * xhp_attribute_decl comma_list * tok (*;*) (* there is normally only one 'children' declaration in a class *) - | XhpChildrenDecl of + | XhpChildrenDecl of tok (* children *) * xhp_children_decl * tok (*;*) - | XhpCategoriesDecl of + | XhpCategoriesDecl of tok (* category *) * xhp_category_decl comma_list * tok (*;*) - and xhp_attribute_decl = + and xhp_attribute_decl = | XhpAttrInherit of xhp_tag wrap - | XhpAttrDecl of xhp_attribute_type * xhp_attr_name * + | XhpAttrDecl of xhp_attribute_type * xhp_attr_name * xhp_value_affect option * tok option (* is required *) - and xhp_attribute_type = + and xhp_attribute_type = | XhpAttrType of name (* e.g. float, bool, var, array, :foo *) | XhpAttrEnum of tok (* enum *) * constant comma_list brace and xhp_value_affect = tok (* = *) * static_scalar @@ -751,7 +751,7 @@ and class_def = { * regexps can be written. For instance pcdata must be nested. But * here I simplified the type. *) - and xhp_children_decl = + and xhp_children_decl = | XhpChild of xhp_tag wrap (* :x:frag *) | XhpChildCategory of xhp_tag wrap (* %x:frag *) @@ -780,7 +780,7 @@ and trait_rule = unit (* Other declarations *) (* ------------------------------------------------------------------------- *) (*s: AST other declaration *) -and global_var = +and global_var = | GlobalVar of dname | GlobalDollar of tok * r_variable | GlobalDollarExpr of tok * expr brace @@ -794,7 +794,7 @@ and static_var = dname * static_scalar_affect option * be an expr or a static_scalar. We don't need this "isomorphism". * I never leveraged the specificities of static_scalar (maybe a compiler * would, but my checker/refactorers/... don't). - * + * * Note that it's not 'type static_scalar = scalar' because static_scalar * actually allows arrays (why the heck they called it a scalar then ....) * and plus/minus which are only in expr. @@ -825,12 +825,12 @@ and stmt_and_def = stmt * nested function and toplevel functions. Also it's better to * group the toplevel statements together (StmtList below), so that * in the database later they share the same id. - * + * * Note that nested functions are usually under a if(defined(...)) at * the toplevel. There is no ifdef in PHP so they reuse if. *) (*s: AST toplevel *) -and toplevel = +and toplevel = (*s: toplevel constructors *) | StmtList of stmt list | FuncDef of func_def @@ -855,11 +855,11 @@ and toplevel = (*s: AST entity *) (* The goal of the entity type is to lift up important entities which * are originally nested in the AST such as methods. - * + * * history: was in ast_entity_php.ml before but better to put everything * in one file. *) -type entity = +type entity = | FunctionE of func_def | ClassE of class_def | ConstantE of constant_def @@ -875,7 +875,7 @@ type entity = | MiscE of tok list (*e: AST entity *) (*s: AST any *) -type any = +type any = | Lvalue of lvalue | Expr of expr | Stmt2 of stmt @@ -895,7 +895,7 @@ type any = | ListAssign of list_assign | ColonStmt2 of colon_stmt | Case2 of case - + | XhpAttribute of xhp_attribute | XhpAttrValue of xhp_attr_value | XhpHtml2 of xhp_html @@ -915,7 +915,7 @@ type any = (*****************************************************************************) let noScope () = ref (Scope_code.NoScope) -let fakeInfo ?(next_to=None) str = { +let fakeInfo ?(next_to=None) str = { token = FakeTokStr (str, next_to); comments = (); transfo = NoTransfo; @@ -949,18 +949,18 @@ let map_comma_list f xs = List.map (fun x -> ) xs -let unarg arg = +let unarg arg = match arg with | Arg e -> e | ArgRef _ -> failwith "Found a ArgRef" -let unargs xs = +let unargs xs = uncomma xs +> Common.partition_either (function | Arg e -> Left e | ArgRef (t, e) -> Right (e) ) -let unmodifiers class_vars = +let unmodifiers class_vars = match class_vars with | NoModifiers _ -> [] | VModifiers xs -> List.map unwrap xs @@ -979,7 +979,7 @@ let pinfo_of_info = Parse_info.pinfo_of_info (*x: ast_php.ml *) let rewrap_str = Parse_info.rewrap_str (*x: ast_php.ml *) -(* for error reporting *) +(* for error reporting *) let string_of_info x = Parse_info.string_of_info x let is_origintok = Parse_info.is_origintok @@ -991,7 +991,7 @@ let compare_pos ii1 ii2 = | FakeTokStr (s, Some (pi_orig, offset)) -> Virt (pi_orig, offset) | FakeTokStr _ - | Ab + | Ab -> failwith "get_pos: Ab or FakeTok" | ExpandedTok (pi_pp, pi_orig, offset) -> Virt (pi_orig, offset) @@ -1002,12 +1002,12 @@ let compare_pos ii1 ii2 = | (Real p1, Real p2) -> compare p1.Parse_info.charpos p2.Parse_info.charpos | (Virt (p1,_), Real p2) -> - if (compare p1.Parse_info.charpos p2.Parse_info.charpos) =|= (-1) - then (-1) + if (compare p1.Parse_info.charpos p2.Parse_info.charpos) =|= (-1) + then (-1) else 1 | (Real p1, Virt (p2,_)) -> - if (compare p1.Parse_info.charpos p2.Parse_info.charpos) =|= 1 - then 1 + if (compare p1.Parse_info.charpos p2.Parse_info.charpos) =|= 1 + then 1 else (-1) | (Virt (p1,o1), Virt (p2,o2)) -> let poi1 = p1.Parse_info.charpos in @@ -1030,25 +1030,25 @@ let compare_pos ii1 ii2 = * information, to "abstract those line" (al) information. *) -let al_info x = +let al_info x = { x with token = Ab } (*x: ast_php.ml *) (*****************************************************************************) (* Views *) (*****************************************************************************) -(* examples: +(* examples: * inline more static funcall in expr type or variable type - * + * *) (*x: ast_php.ml *) (*****************************************************************************) (* Helpers, could also be put in lib_parsing.ml instead *) (*****************************************************************************) -let name e = +let name e = match e with | Name x -> unwrap x - | XhpName (xs, _tok) -> + | XhpName (xs, _tok) -> ":" ^ (Common.join ":" xs) let str_of_name x = name x @@ -1056,8 +1056,8 @@ let dname (DName x) = unwrap x let str_of_dname x = dname x (*x: ast_php.ml *) -let info_of_name e = - match e with +let info_of_name e = + match e with | (Name (x,y)) -> y | (XhpName (x,y)) -> y let info_of_dname (DName (x,y)) = y diff --git a/lang_php/parsing/lexer_php.mll b/lang_php/parsing/lexer_php.mll index e8c349110..0814e6a28 100644 --- a/lang_php/parsing/lexer_php.mll +++ b/lang_php/parsing/lexer_php.mll @@ -2,24 +2,24 @@ { (*s: Facebook copyright *) (* Yoann Padioleau - * + * * Copyright (C) 2009-2012 Facebook * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 2.1 as published by the Free Software Foundation, with the * special exception on linking described in file license.txt. - * + * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file * license.txt for more details. *) (*e: Facebook copyright *) -open Common +open Common (*s: basic pfff module open and aliases *) -open Ast_php +open Ast_php module Ast = Ast_php module Flag = Flag_parsing_php @@ -39,7 +39,7 @@ open Parser_php (*****************************************************************************) (* Wrappers *) (*****************************************************************************) -let pr2, pr2_once = Common.mk_pr2_wrappers Flag.verbose_lexing +let pr2, pr2_once = Common.mk_pr2_wrappers Flag.verbose_lexing (*****************************************************************************) (* Helpers *) @@ -48,16 +48,16 @@ exception Lexical of string (*s: lexer helpers *) (* pad: hack around ocamllex to emulate the yyless of flex *) -let yyless n lexbuf = +let yyless n lexbuf = lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - n; let currp = lexbuf.Lexing.lex_curr_p in lexbuf.Lexing.lex_curr_p <- { currp with Lexing.pos_cnum = currp.Lexing.pos_cnum - n; } (*x: lexer helpers *) -let tok lexbuf = +let tok lexbuf = Lexing.lexeme lexbuf -let tokinfo lexbuf = +let tokinfo lexbuf = Parse_info.tokinfo_str_pos (Lexing.lexeme lexbuf) (Lexing.lexeme_start lexbuf) (*x: lexer helpers *) @@ -72,9 +72,9 @@ let case_str s = else String.lowercase s -let xhp_or_t_ident ii fii = - if !Flag.xhp_builtin - then fii ii +let xhp_or_t_ident ii fii = + if !Flag.xhp_builtin + then fii ii else T_IDENT(case_str (Ast.str_of_info ii), ii) let lang_ext_or_t_ident ii fii = @@ -90,11 +90,11 @@ let lang_ext_or_t_ident ii fii = * Note that PHP allows those keywords to be used in certain places, * for instance as object fields as in $o->while, so the transformation * from a LABEL to those keywords is done only in a few cases. - * + * * note: PHP is case insensitive so this hash table is used on * a lowercased string so don't put strings in uppercase below because * such keyword would never be reached! - * + * * coupling: if you add a new keyword, don't forget to also modify * the xhp_attr_name_atom grammar rule in parser_php.mly * @@ -113,14 +113,14 @@ let keyword_table = Common.hash_of_list [ "class_xdebug", (fun ii -> T_CLASS_XDEBUG ii); "resource_xdebug", (fun ii -> T_RESOURCE_XDEBUG ii); - (* Those tokens were not in the original PHP lexer. This allowed to - * have "self"/"parent" to be used at more places, e.g. as a function + (* Those tokens were not in the original PHP lexer. This allowed to + * have "self"/"parent" to be used at more places, e.g. as a function * name which is tolerated by PHP but should not IMHO. Those idents * have a special meaning and this should be reflected in the lexer, - * especially since PHP 5.3 which allows static:: in addition to - * self::, parent::. 'static' is a keyword so there is no reason + * especially since PHP 5.3 which allows static:: in addition to + * self::, parent::. 'static' is a keyword so there is no reason * to not make self/parent keywords too. - * + * * todo: should do something similar for $this. *) "self", (fun ii -> T_SELF ii); "parent", (fun ii -> T_PARENT ii); @@ -180,7 +180,7 @@ let keyword_table = Common.hash_of_list [ "unset", (fun ii -> T_UNSET ii); "isset", (fun ii -> T_ISSET ii); (*e: repetitive keywords table *) - "__line__", (fun ii -> T_LINE ii); + "__line__", (fun ii -> T_LINE ii); "__file__", (fun ii -> T_FILE ii); "__dir__", (fun ii -> T_DIR ii); "__function__", (fun ii ->T_FUNC_C ii); "__method__",(fun ii ->T_METHOD_C ii); "__class__", (fun ii -> T_CLASS_C ii);" __trait__", (fun ii ->T_TRAIT_C ii); @@ -211,7 +211,7 @@ let keyword_table = Common.hash_of_list [ (* "empty" is already a PHP keyword, see T_EMPTY *) "pcdata", (fun ii -> xhp_or_t_ident ii (fun x -> T_XHP_PCDATA x)); ] -let _ = assert ((Common.hkeys keyword_table) +> +let _ = assert ((Common.hkeys keyword_table) +> List.for_all (fun s -> s = String.lowercase s)) (*e: keywords_table hash *) @@ -226,7 +226,7 @@ let _ = assert ((Common.hkeys keyword_table) +> * mean different things. Indeed the PHP language in fact supports * multiple languages or "modes" inside the same script (which also * make emacs mode for such language harder to define). - * + * * Inside the PHP script code part, the quote is the start of a string * and there must be a corresponding quote ending the string. Inside * the HTML part of a PHP file it's just a character like any other @@ -235,13 +235,13 @@ let _ = assert ((Common.hkeys keyword_table) +> * as 'if' can again mean different things; when they are preceded by a * '->' they correspond to the possible name of a field, otherwise * they are special PHP keywords. - * + * * Because all of this, the lexer has multiple states which are * represented below and adjusted via some push/pop_mode function * below. Depending on the state the lexer behaves differently. *) -type state_mode = +type state_mode = (* aka HTML mode *) | INITIAL (* started with *) @@ -253,8 +253,8 @@ type state_mode = * | ST_DOC_COMMENT * | ST_ONE_LINE_COMMENT *) - (* started with ", finished with ". In most languages strings - * are a single tokens but PHP allow interpolation which means + (* started with ", finished with ". In most languages strings + * are a single tokens but PHP allow interpolation which means * a string can contain nested PHP variables or expressions. *) | ST_DOUBLE_QUOTES @@ -271,7 +271,7 @@ type state_mode = (* started with <<<'XXX', finished by XXX; *) | ST_START_NOWDOC of string - (* started with ' by transiting to ST_IN_XHP_TEXT, or really finished * by '/>'. *) @@ -285,25 +285,25 @@ type state_mode = (*s: lexer state global variables *) let default_state = INITIAL -let _mode_stack = +let _mode_stack = ref [default_state] (*x: lexer state global variables *) (* because ocamllex does not have the yyless feature, have to cheat. - * update: in fact can hack my own yyless so maybe should revisit + * update: in fact can hack my own yyless so maybe should revisit * this code. *) -let _pending_tokens = +let _pending_tokens = ref ([]: Parser_php.token list) -(* The logic to modify _last_non_whitespace_like_token is in the +(* The logic to modify _last_non_whitespace_like_token is in the * caller of the lexer, that is in Parse_php.tokens. * Used for XHP. *) -let _last_non_whitespace_like_token = +let _last_non_whitespace_like_token = ref (None: Parser_php.token option) (*e: lexer state global variables *) (*s: lexer state global reinitializer *) -let reset () = +let reset () = _mode_stack := [default_state]; (*s: auxillary reset lexing actions *) _pending_tokens := []; @@ -313,10 +313,10 @@ let reset () = (*e: lexer state global reinitializer *) (*s: lexer state function hepers *) -let rec current_mode () = - try +let rec current_mode () = + try Common.top !_mode_stack - with Failure("hd") -> + with Failure("hd") -> pr2("LEXER: mode_stack is empty, defaulting to INITIAL"); reset(); current_mode () @@ -324,20 +324,20 @@ let rec current_mode () = let push_mode mode = Common.push2 mode _mode_stack let pop_mode () = ignore(Common.pop2 _mode_stack) -(* What is the semantic of BEGIN() in flex ? start from scratch with empty - * stack ? +(* What is the semantic of BEGIN() in flex ? start from scratch with empty + * stack ? *) -let set_mode mode = +let set_mode mode = pop_mode(); push_mode mode; () (* Here is an example of state transition. Given a php file like: * - * foobar; ?> - * + * foobar; ?> + * * we start with the stack in [INITIAL]. The transitions are then: - * + * * ' [IN_SCRIPTING], via set_mode() * ' ' -> [IN_SCRIPTING] * 'return' -> [IN_SCRIPTING] @@ -352,33 +352,33 @@ let set_mode mode = * ';' -> [IN_SCRIPTING] * ' ' -> [IN_SCRIPTING] * '?> -> [INITIAL], via set_mode() - * + * *) (*x: lexer state function hepers *) -let push_token tok = +let push_token tok = _pending_tokens := tok::!_pending_tokens (*e: lexer state function hepers *) (*e: lexer state trick helpers *) -(* xhp: the function below is used to disambiguate the use +(* xhp: the function below is used to disambiguate the use * of ":" and "%" as either a way to start an XHP identifier or as * a binary operator. Note that we use a whitelist approach * for detecting ':' as a binary operator whereas HPHP and * XHPAST use a whitelist approach for detecting ':' as the * start of an XHP identifier. - * - * How to know the following lists of tokens is correct ? - * We should compute FOLLOW(tok) for all tokens and check + * + * How to know the following lists of tokens is correct ? + * We should compute FOLLOW(tok) for all tokens and check * if "%" or ":" can be in it ? *) -let is_in_binary_operator_position last_tok = +let is_in_binary_operator_position last_tok = match last_tok with | Some ( - (* if we are after a number or any kind of scalar, then it's ok to + (* if we are after a number or any kind of scalar, then it's ok to * have a binary operator *) T_LNUMBER _ | T_DNUMBER _ - | T_CONSTANT_ENCAPSED_STRING _ | TGUIL _ | TBACKQUOTE _ + | T_CONSTANT_ENCAPSED_STRING _ | TGUIL _ | TBACKQUOTE _ (* same for ']' or ')'; anything that "terminates" an expression *) | TCBRA _ | TCPAR _ @@ -413,11 +413,11 @@ let HNUM = "0x"['0'-'9''a'-'f''A'-'F']+ * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character * or a { and therefore will be taken literally. The case of literal $ before * a variable or "${" is handled in a rule for each string type - * + * * TODO: \x7f-\xff */ *) -let DOUBLE_QUOTES_LITERAL_DOLLAR = +let DOUBLE_QUOTES_LITERAL_DOLLAR = ("$"+([^'a'-'z''A'-'Z''_''$''"''\\' '{']|('\\' ANY_CHAR))) let BACKQUOTE_LITERAL_DOLLAR = ("$"+([^'a'-'z''A'-'Z''_''$''`''\\' '{']|('\\' ANY_CHAR))) @@ -431,10 +431,10 @@ let BACKQUOTE_LITERAL_DOLLAR = * that the next line doesn't contain a possible ending label */ *) -let DOUBLE_QUOTES_CHARS = - ("{"*([^'$''"''\\''{']| +let DOUBLE_QUOTES_CHARS = + ("{"*([^'$''"''\\''{']| ("\\" ANY_CHAR))| DOUBLE_QUOTES_LITERAL_DOLLAR) -let BACKQUOTE_CHARS = +let BACKQUOTE_CHARS = ("{"*([^'$' '`' '\\' '{']|('\\' ANY_CHAR))| BACKQUOTE_LITERAL_DOLLAR) (*x: regexp aliases *) (*x: regexp aliases *) @@ -455,15 +455,15 @@ rule st_in_scripting = parse (* spacing/comments *) (* ----------------------------------------------------------------------- *) (*s: comments rules *) - | "/*" { - let info = tokinfo lexbuf in + | "/*" { + let info = tokinfo lexbuf in let com = st_comment lexbuf in T_COMMENT(info +> tok_add_s com) } | "/**/" { T_COMMENT(tokinfo lexbuf) } | "/**" { (* RESET_DOC_COMMENT(); *) - let info = tokinfo lexbuf in + let info = tokinfo lexbuf in let com = st_comment lexbuf in T_DOC_COMMENT(info +> tok_add_s com) } @@ -504,24 +504,24 @@ rule st_in_scripting = parse | ">>=" { T_SR_EQUAL(tokinfo lexbuf) } | ".=" { T_CONCAT_EQUAL(tokinfo lexbuf) } - | "==" { T_IS_EQUAL(tokinfo lexbuf) } + | "==" { T_IS_EQUAL(tokinfo lexbuf) } | "!=" { T_IS_NOT_EQUAL(tokinfo lexbuf) } - | "===" { T_IS_IDENTICAL(tokinfo lexbuf) } + | "===" { T_IS_IDENTICAL(tokinfo lexbuf) } | "!==" { T_IS_NOT_IDENTICAL(tokinfo lexbuf) } | "<>" { T_IS_NOT_EQUAL(tokinfo lexbuf) } - | "<=" { T_IS_SMALLER_OR_EQUAL(tokinfo lexbuf) } - | ">=" { T_IS_GREATER_OR_EQUAL(tokinfo lexbuf) } + | "<=" { T_IS_SMALLER_OR_EQUAL(tokinfo lexbuf) } + | ">=" { T_IS_GREATER_OR_EQUAL(tokinfo lexbuf) } - | "<" { TSMALLER(tokinfo lexbuf) } - | ">" { TGREATER(tokinfo lexbuf) } + | "<" { TSMALLER(tokinfo lexbuf) } + | ">" { TGREATER(tokinfo lexbuf) } - | "&&" { T_BOOLEAN_AND(tokinfo lexbuf) } - | "||" { T_BOOLEAN_OR(tokinfo lexbuf) } + | "&&" { T_BOOLEAN_AND(tokinfo lexbuf) } + | "||" { T_BOOLEAN_OR(tokinfo lexbuf) } - | "<<" { T_SL(tokinfo lexbuf) } + | "<<" { T_SL(tokinfo lexbuf) } | ">>" { T_SR(tokinfo lexbuf) } - | "&" { TAND(tokinfo lexbuf) } + | "&" { TAND(tokinfo lexbuf) } | "|" { TOR(tokinfo lexbuf) } | "^" { TXOR(tokinfo lexbuf) } @@ -534,11 +534,11 @@ rule st_in_scripting = parse | "xor" { T_LOGICAL_XOR(tokinfo lexbuf) } (*e: repetitive symbol rules *) (*x: symbol rules *) - (* Flex/Bison allow to use single characters directly as-is in the grammar - * by adding this in the lexer: - * - * {TOKENS} { return yytext[0];} - * + (* Flex/Bison allow to use single characters directly as-is in the grammar + * by adding this in the lexer: + * + * {TOKENS} { return yytext[0];} + * * We don't, so we have transformed all those tokens in proper tokens with * a name in the parser, and return them in the lexer. *) @@ -558,42 +558,42 @@ rule st_in_scripting = parse | ":" { TCOLON(tokinfo lexbuf) } | "?" { TQUESTION(tokinfo lexbuf) } - (* semantic grep *) + (* semantic grep or vargs extension *) | "..." { TDOTS(tokinfo lexbuf) } (*x: symbol rules *) (* we may come from a st_looking_for_xxx context, like in string * interpolation, so seeing a } we pop_mode! *) - | '}' { - pop_mode (); + | '}' { + pop_mode (); (* RESET_DOC_COMMENT(); ??? *) TCBRACE(tokinfo lexbuf) } - | '{' { - push_mode ST_IN_SCRIPTING; + | '{' { + push_mode ST_IN_SCRIPTING; TOBRACE(tokinfo lexbuf) } (*x: symbol rules *) | ("->" as sym) (WHITESPACEOPT as white) (LABEL as label) { - (* TODO: The ST_LOOKING_FOR_PROPERTY state does not work for now because + (* TODO: The ST_LOOKING_FOR_PROPERTY state does not work for now because * it requires a yyless(1) which is not available in ocamllex (or is it ?) * So have to cheat and use instead the pending_token with push_token. - * - * buggy: push_mode ST_LOOKING_FOR_PROPERTY; + * + * buggy: push_mode ST_LOOKING_FOR_PROPERTY; *) let info = tokinfo lexbuf in - + let syminfo = rewrap_str sym info in let parse_info = Parse_info.parse_info_of_info info in - let pos_after_sym = + let pos_after_sym = parse_info.Parse_info.charpos + String.length sym in let pos_after_white = pos_after_sym + String.length white in let whiteinfo = Parse_info.tokinfo_str_pos white pos_after_sym in let lblinfo = Parse_info.tokinfo_str_pos label pos_after_white in - + push_token (T_IDENT (case_str label, lblinfo)); (* todo: could be newline ... *) push_token (TSpaces (whiteinfo)); @@ -611,9 +611,9 @@ rule st_in_scripting = parse (*x: symbol rules *) (* XHP "elements". - * + * * In XHP the ":" and "%" characters are used to identify - * XHP tags, e.g. :x:frag. There is some possible ambiguity though + * XHP tags, e.g. :x:frag. There is some possible ambiguity though * with their others use in PHP: ternary expr and cases for ":" and * the modulo binary operator for "%". It is legal in PHP to do * e?1:null; or case 1:null. We thus can not blindly considerate ':null' @@ -630,11 +630,11 @@ rule st_in_scripting = parse * lexical level. *) | ":" (XHPTAG as tag) { - if !Flag.xhp_builtin && + if !Flag.xhp_builtin && not (is_in_binary_operator_position !_last_non_whitespace_like_token) then let xs = Common.split ":" tag in - T_XHP_COLONID_DEF (xs, tokinfo lexbuf) + T_XHP_COLONID_DEF (xs, tokinfo lexbuf) else begin yyless (String.length tag) lexbuf; TCOLON(tokinfo lexbuf) @@ -646,28 +646,28 @@ rule st_in_scripting = parse not (is_in_binary_operator_position !_last_non_whitespace_like_token) then let xs = Common.split ":" tag in - T_XHP_PERCENTID_DEF (xs, tokinfo lexbuf) + T_XHP_PERCENTID_DEF (xs, tokinfo lexbuf) else begin yyless (String.length tag) lexbuf; TMOD(tokinfo lexbuf) end } - (* xhp: we need to disambiguate the different use of '<' to know whether + (* xhp: we need to disambiguate the different use of '<' to know whether * we are in a position where an XHP construct can be started. Knowing * what was the previous token seems enough; no need to hack the * grammar to have a global shared by the lexer and parser. - * + * * We could maybe even return a TSMALLER in both cases and still - * not generate any conflict in the grammar, but it feels cleaner to + * not generate any conflict in the grammar, but it feels cleaner to * generate a different token, because we will really change the lexing * mode when we will see a '>' which makes the parser enter in the * ST_IN_XHP_TEXT state where it's ok to write "I don't like you" * in which the quote does not need to be ended. - * + * * note: no leading ":" for the tag when in "use" position. *) - | "<" (XHPTAG as tag) { + | "<" (XHPTAG as tag) { let xs = Common.split ":" tag in @@ -676,7 +676,7 @@ rule st_in_scripting = parse * are possibly before a XHP construct ? trial-and-error ? * Usually having a '<' after a punctuation means XHP. * Indeed '<' is a binary operator which excepts scalar. - * + * * todo? TCPAR ? no, because it's ok to do (1) < (2) ! *) | Some ( @@ -684,7 +684,7 @@ rule st_in_scripting = parse | T_ECHO _ | T_PRINT _ | T_CLONE _ | TSEMICOLON _ | TCOMMA _ | TOBRACE _ | TCBRACE _ - | T_RETURN _ + | T_RETURN _ | TEQ _ | T_CONCAT_EQUAL _ | T_DOUBLE_ARROW _ | TQUESTION _ | TCOLON _ @@ -693,7 +693,7 @@ rule st_in_scripting = parse when !Flag.xhp_builtin -> push_mode (ST_IN_XHP_TAG xs); T_XHP_OPEN_TAG(xs, tokinfo lexbuf) - | _ -> + | _ -> yyless (String.length tag) lexbuf; TSMALLER(tokinfo lexbuf) } @@ -701,8 +701,8 @@ rule st_in_scripting = parse | "@required" { let s = tok lexbuf in if !Flag.xhp_builtin - then T_XHP_REQUIRED (tokinfo lexbuf) - else begin + then T_XHP_REQUIRED (tokinfo lexbuf) + else begin yyless (String.length s - 1) lexbuf; T__AT(tokinfo lexbuf) end @@ -726,23 +726,23 @@ rule st_in_scripting = parse | LABEL { let info = tokinfo lexbuf in let s = tok lexbuf in - match Common.optionise (fun () -> + match Common.optionise (fun () -> (* PHP is case insensitive ... it's ok to write IF(...) { ... } *) Hashtbl.find keyword_table (String.lowercase s)) with | Some f -> f info (* was called T_STRING in original grammar *) | None -> - T_IDENT (case_str s, info) + T_IDENT (case_str s, info) } (* Could put a special rule for "$this", but there are multiple places here * where we can generate a T_VARIABLE, and we can have even expressions - * like ${this}, so it is simpler to do the "this-analysis" in the grammar, + * like ${this}, so it is simpler to do the "this-analysis" in the grammar, * later when we generate a Var or This. *) | "$" (LABEL as s) { - T_VARIABLE(case_str s, tokinfo lexbuf) + T_VARIABLE(case_str s, tokinfo lexbuf) } (*e: keyword and ident rules *) @@ -751,19 +751,19 @@ rule st_in_scripting = parse (* Constant *) (* ----------------------------------------------------------------------- *) (*s: constant rules *) - | LNUM - { + | LNUM + { (* more? cf original lexer *) let s = tok lexbuf in let ii = tokinfo lexbuf in - try + try let _ = int_of_string s in T_LNUMBER(s, ii) - with Failure _ -> + with Failure _ -> T_DNUMBER(s, (*float_of_string s,*) ii) } - | HNUM + | HNUM { (* more? cf orginal lexer *) T_DNUMBER(tok lexbuf, tokinfo lexbuf) @@ -779,42 +779,42 @@ rule st_in_scripting = parse (* ----------------------------------------------------------------------- *) (*s: strings rules *) (* - * The original PHP lexer does a few things to make the - * difference at parsing time between static strings (which do not + * The original PHP lexer does a few things to make the + * difference at parsing time between static strings (which do not * contain any interpolation) and dynamic strings. So some regexps * below are quite hard to understand ... but apparently it works. * When the lexer thinks it's a dynamic strings, it let the grammar * do most of the hard work. See the rules using TGUIL in the grammar * (and here in the lexer). - * + * * The optional 'b' at the beginning is for binary strings. - * + * * /* * ("{"*|"$"* ) handles { or $ at the end of a string (or the entire * contents) - * - * + * + * * int bprefix = (yytext[0] != '"') ? 1 : 0; * zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"' TSRMLS_CC); - */ + */ *) - + (* static strings *) - | 'b'? (['"'] ((DOUBLE_QUOTES_CHARS* ("{"*|"$"* )) as s) ['"']) + | 'b'? (['"'] ((DOUBLE_QUOTES_CHARS* ("{"*|"$"* )) as s) ['"']) { T_CONSTANT_ENCAPSED_STRING(s, tokinfo lexbuf) } | 'b'? (['\''] (([^'\'' '\\']|('\\' ANY_CHAR))* as s) ['\'']) - { + { (* more? cf original lexer *) T_CONSTANT_ENCAPSED_STRING(s, tokinfo lexbuf) } (*x: strings rules *) (* dynamic strings *) - | ['"'] { + | ['"'] { push_mode ST_DOUBLE_QUOTES; TGUIL(tokinfo lexbuf) } - | ['`'] { + | ['`'] { push_mode ST_BACKQUOTE; TBACKQUOTE(tokinfo lexbuf) } @@ -836,45 +836,45 @@ rule st_in_scripting = parse (* Misc *) (* ----------------------------------------------------------------------- *) (*s: misc rules *) - (* ugly, they could have done that in the grammar ... or maybe it was - * because it could lead to some ambiguities ? + (* ugly, they could have done that in the grammar ... or maybe it was + * because it could lead to some ambiguities ? *) - | "(" TABS_AND_SPACES ("int"|"integer") TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES ("int"|"integer") TABS_AND_SPACES ")" { T_INT_CAST(tokinfo lexbuf) } - | "(" TABS_AND_SPACES ("real"|"double"|"float") TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES ("real"|"double"|"float") TABS_AND_SPACES ")" { T_DOUBLE_CAST(tokinfo lexbuf) } - | "(" TABS_AND_SPACES "string" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "string" TABS_AND_SPACES ")" { T_STRING_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES "binary" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "binary" TABS_AND_SPACES ")" { T_STRING_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES "array" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "array" TABS_AND_SPACES ")" { T_ARRAY_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES "object" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "object" TABS_AND_SPACES ")" { T_OBJECT_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES ("bool"|"boolean") TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES ("bool"|"boolean") TABS_AND_SPACES ")" { T_BOOL_CAST(tokinfo lexbuf); } (* PHP is case insensitive for many things *) - | "(" TABS_AND_SPACES "Array" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "Array" TABS_AND_SPACES ")" { T_ARRAY_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES "Object" TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES "Object" TABS_AND_SPACES ")" { T_OBJECT_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES ("Bool"|"Boolean") TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES ("Bool"|"Boolean") TABS_AND_SPACES ")" { T_BOOL_CAST(tokinfo lexbuf); } - | "(" TABS_AND_SPACES ("unset") TABS_AND_SPACES ")" + | "(" TABS_AND_SPACES ("unset") TABS_AND_SPACES ")" { T_UNSET_CAST(tokinfo lexbuf); } (*x: misc rules *) - | "?>" + | "?>" { (* because of XHP and my token merger: - * old: | "")NEWLINE? + * old: | "")NEWLINE? * see tests/xhp/pb_cant_merge2.php *) match current_mode () with @@ -883,14 +883,14 @@ rule st_in_scripting = parse (*/* implicit ';' at php-end tag */*) (* todo? ugly, could instead generate a FakeToken or * ExpandedToken, but then some code later may assume - * right now that all tokens from the lexer are + * right now that all tokens from the lexer are * origin tokens, so may be hard to change. - * + * * old: (T_CLOSE_TAG(tokinfo lexbuf)) * note that T_CLOSE_TAG was skipped anyway in Parse_php.parse_php *) TSEMICOLON(tokinfo lexbuf) - + | ST_IN_SCRIPTING2 -> set_mode INITIAL; T_CLOSE_TAG_OF_ECHO(tokinfo lexbuf) @@ -903,8 +903,8 @@ rule st_in_scripting = parse (* ----------------------------------------------------------------------- *) (*s: semi repetitive st_in_scripting rules for eof and error handling *) | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in token rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -920,7 +920,7 @@ and initial = parse | "" + | "" { (* XXX if short_tags normally otherwise T_INLINE_HTML *) (* pr2 "BAD USE OF Ast.rewrap_str "") } - | _ (* ANY_CHAR *) { - if !Flag.verbose_lexing + | _ (* ANY_CHAR *) { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in token rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1019,14 +1019,14 @@ and st_var_offset = parse | "$" (LABEL as s) { T_VARIABLE(case_str s, tokinfo lexbuf) } | LABEL { T_IDENT(case_str (tok lexbuf), tokinfo lexbuf) } - | "]" { + | "]" { pop_mode(); TCBRA(tokinfo lexbuf); } (*s: repetitive st_var_offset rules for error handling *) | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_var_offset rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1040,9 +1040,9 @@ and st_var_offset = parse and st_double_quotes = parse - | DOUBLE_QUOTES_CHARS+ { - T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) - } + | DOUBLE_QUOTES_CHARS+ { + T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) + } (* todo? was in original scanner ? *) | "{" { T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } @@ -1052,7 +1052,7 @@ and st_double_quotes = parse (*x: encapsulated dollar stuff rules *) | "$" (LABEL as s) "[" { let info = tokinfo lexbuf in - + let varinfo = rewrap_str ("$" ^ s) info in let charpos_info = Ast.pos_of_info varinfo in let pos_after_label = charpos_info + String.length ("$" ^ s) in @@ -1066,7 +1066,7 @@ and st_double_quotes = parse | "$" { T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } (*x: encapsulated dollar stuff rules *) - | "{$" { + | "{$" { yyless 1 lexbuf; push_mode ST_IN_SCRIPTING; T_CURLY_OPEN(tokinfo lexbuf); @@ -1078,7 +1078,7 @@ and st_double_quotes = parse } (*e: encapsulated dollar stuff rules *) - | ['"'] { + | ['"'] { (* was originally set_mode ST_IN_SCRIPTING, but with XHP * the context for a double quote may not be anymore always * ST_IN_SCRIPTING @@ -1088,8 +1088,8 @@ and st_double_quotes = parse } (*s: repetitive st_double_quotes rules for error handling *) | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_double_quotes rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1099,10 +1099,10 @@ and st_double_quotes = parse (* ----------------------------------------------------------------------- *) (*s: rule st_backquote *) (* mostly copy paste of st_double_quotes; just the end regexp is different *) -and st_backquote = parse +and st_backquote = parse - | BACKQUOTE_CHARS+ { - T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) + | BACKQUOTE_CHARS+ { + T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } (*s: encapsulated dollar stuff rules *) @@ -1110,7 +1110,7 @@ and st_backquote = parse (*x: encapsulated dollar stuff rules *) | "$" (LABEL as s) "[" { let info = tokinfo lexbuf in - + let varinfo = rewrap_str ("$" ^ s) info in let charpos_info = Ast.pos_of_info varinfo in let pos_after_label = charpos_info + String.length ("$" ^ s) in @@ -1124,7 +1124,7 @@ and st_backquote = parse | "$" { T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } (*x: encapsulated dollar stuff rules *) - | "{$" { + | "{$" { yyless 1 lexbuf; push_mode ST_IN_SCRIPTING; T_CURLY_OPEN(tokinfo lexbuf); @@ -1136,15 +1136,15 @@ and st_backquote = parse } (*e: encapsulated dollar stuff rules *) - | ['`'] { + | ['`'] { set_mode ST_IN_SCRIPTING; - TBACKQUOTE(tokinfo lexbuf) - } + TBACKQUOTE(tokinfo lexbuf) + } (*s: repetitive st_backquote rules for error handling *) | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_backquote rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1155,14 +1155,14 @@ and st_backquote = parse (*s: rule st_start_heredoc *) (* As heredoc have some of the semantic of double quote strings, again some * rules from st_double_quotes are copy pasted here. - * - * todo? the rules below are not what was in the original Zend lexer, + * + * todo? the rules below are not what was in the original Zend lexer, * but the original lexer was doing very complicated stuff ... *) and st_start_heredoc stopdoc = parse | (LABEL as s) (";"? as semi) (['\n' '\r'] as space) { - let info = tokinfo lexbuf in + let info = tokinfo lexbuf in let lbl_info = rewrap_str s info in @@ -1170,20 +1170,20 @@ and st_start_heredoc stopdoc = parse let pos_after_label = pos + String.length s in let pos_after_semi = pos_after_label + String.length semi in - let colon_info = + let colon_info = Parse_info.tokinfo_str_pos semi pos_after_label in - let space_info = + let space_info = Parse_info.tokinfo_str_pos (string_of_char space) pos_after_semi in - + if s = stopdoc then begin set_mode ST_IN_SCRIPTING; push_token (TNewline (space_info)); if semi = ";" then push_token (TSEMICOLON (colon_info)); - + T_END_HEREDOC(lbl_info) - end else + end else T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } @@ -1197,7 +1197,7 @@ and st_start_heredoc stopdoc = parse (*x: encapsulated dollar stuff rules *) | "$" (LABEL as s) "[" { let info = tokinfo lexbuf in - + let varinfo = rewrap_str ("$" ^ s) info in let charpos_info = Ast.pos_of_info varinfo in let pos_after_label = charpos_info + String.length ("$" ^ s) in @@ -1214,7 +1214,7 @@ and st_start_heredoc stopdoc = parse | ['\n' '\r'] { TNewline (tokinfo lexbuf) } (*x: encapsulated dollar stuff rules *) - | "{$" { + | "{$" { yyless 1 lexbuf; push_mode ST_IN_SCRIPTING; T_CURLY_OPEN(tokinfo lexbuf); @@ -1228,8 +1228,8 @@ and st_start_heredoc stopdoc = parse (*s: repetitive st_start_heredoc rules for error handling *) | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_start_heredoc rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1242,7 +1242,7 @@ and st_start_heredoc stopdoc = parse and st_start_nowdoc stopdoc = parse | (LABEL as s) (";"? as semi) (['\n' '\r'] as space) { - let info = tokinfo lexbuf in + let info = tokinfo lexbuf in let lbl_info = rewrap_str s info in @@ -1250,11 +1250,11 @@ and st_start_nowdoc stopdoc = parse let pos_after_label = pos + String.length s in let pos_after_semi = pos_after_label + String.length semi in - let colon_info = + let colon_info = Parse_info.tokinfo_str_pos semi pos_after_label in - let space_info = + let space_info = Parse_info.tokinfo_str_pos (string_of_char space) pos_after_semi in - + if s = stopdoc then begin set_mode ST_IN_SCRIPTING; @@ -1263,7 +1263,7 @@ and st_start_nowdoc stopdoc = parse then push_token (TSEMICOLON (colon_info)); (* reuse same token than for heredocs *) T_END_HEREDOC(lbl_info) - end else + end else T_ENCAPSED_AND_WHITESPACE(tok lexbuf, tokinfo lexbuf) } | [^ '\n' '\r']+ { @@ -1275,8 +1275,8 @@ and st_start_nowdoc stopdoc = parse } | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_start_nowdoc rule:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1285,28 +1285,28 @@ and st_start_nowdoc stopdoc = parse (*****************************************************************************) (* Rules for XHP *) (*****************************************************************************) -(* XHP lexing states and rules *) +(* XHP lexing states and rules *) and st_in_xhp_tag current_tag = parse - (* The original XHP parser have some special handlings of + (* The original XHP parser have some special handlings of * whitespace and enforce to use certain whitespace at * certain places. Not sure I need to enforce this too. * Simpler to ignore whitespaces. - * + * * todo? factorize with st_in_scripting rule? *) | [' ' '\t']+ { TSpaces(tokinfo lexbuf) } | ['\n' '\r'] { TNewline(tokinfo lexbuf) } - | "/*" { - let info = tokinfo lexbuf in + | "/*" { + let info = tokinfo lexbuf in let com = st_comment lexbuf in T_COMMENT(info +> tok_add_s com) } | "/**/" { T_COMMENT(tokinfo lexbuf) } | "/**" { (* RESET_DOC_COMMENT(); *) - let info = tokinfo lexbuf in + let info = tokinfo lexbuf in let com = st_comment lexbuf in T_DOC_COMMENT(info +> tok_add_s com) } @@ -1322,32 +1322,32 @@ and st_in_xhp_tag current_tag = parse | "=" { TEQ(tokinfo lexbuf) } (* not sure if XHP strings needs the interpolation support *) - | ['"'] { + | ['"'] { push_mode ST_DOUBLE_QUOTES; TGUIL(tokinfo lexbuf) } | "{" { - push_mode ST_IN_SCRIPTING; + push_mode ST_IN_SCRIPTING; TOBRACE(tokinfo lexbuf) } (* a singleton tag *) - | "/>" { + | "/>" { pop_mode (); - T_XHP_SLASH_GT (tokinfo lexbuf) + T_XHP_SLASH_GT (tokinfo lexbuf) } - (* When we see a ">", it means it's just the end of + (* When we see a ">", it means it's just the end of * the opening tag. Transit to IN_XHP_TEXT. *) - | ">" { + | ">" { set_mode (ST_IN_XHP_TEXT current_tag); T_XHP_GT (tokinfo lexbuf) } | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in XHP tag:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1356,14 +1356,14 @@ and st_in_xhp_tag current_tag = parse and st_in_xhp_text current_tag = parse (* a nested xhp construct *) - | "<" (XHPTAG as tag) { + | "<" (XHPTAG as tag) { let xs = Common.split ":" tag in push_mode (ST_IN_XHP_TAG xs); T_XHP_OPEN_TAG(xs, tokinfo lexbuf) } - | "<" "/" (XHPTAG as tag) ">" { + | "<" "/" (XHPTAG as tag) ">" { let xs = Common.split ":" tag in if (xs <> current_tag) then begin @@ -1376,7 +1376,7 @@ and st_in_xhp_text current_tag = parse } (* shortcut for closing tag ? *) - | "<" "/" ">" { + | "<" "/" ">" { (* no check :( *) pop_mode (); T_XHP_CLOSE_TAG(None, tokinfo lexbuf) @@ -1384,7 +1384,7 @@ and st_in_xhp_text current_tag = parse (* PHP interpolation. How the user can produce a { ? &;something ? *) | "{" { - push_mode ST_IN_SCRIPTING; + push_mode ST_IN_SCRIPTING; TOBRACE(tokinfo lexbuf) } @@ -1393,8 +1393,8 @@ and st_in_xhp_text current_tag = parse | eof { EOF (tokinfo lexbuf +> Ast.rewrap_str "") } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in XHP text:"^tok lexbuf); TUnknown (tokinfo lexbuf) } @@ -1403,16 +1403,16 @@ and st_in_xhp_text current_tag = parse (* Rule comment *) (*****************************************************************************) (*s: rule st_comment *) -and st_comment = parse +and st_comment = parse | "*/" { tok lexbuf } (* noteopti: *) - | [^'*']+ { let s = tok lexbuf in s ^ st_comment lexbuf } + | [^'*']+ { let s = tok lexbuf in s ^ st_comment lexbuf } | "*" { let s = tok lexbuf in s ^ st_comment lexbuf } (*s: repetitive st_comment rules for error handling *) | eof { pr2 "LEXER: end of file in comment"; "*/"} - | _ { + | _ { let s = tok lexbuf in pr2 ("LEXER: unrecognised symbol in comment:"^s); s ^ st_comment lexbuf @@ -1424,25 +1424,25 @@ and st_comment = parse and st_one_line_comment = parse | "?"|"%"|">" { let s = tok lexbuf in s ^ st_one_line_comment lexbuf } | ([^'\n' '\r' '?''%''>']* as start) (ANY_CHAR as x) - { + { (match x with - | '?' | '%' | '>' -> + | '?' | '%' | '>' -> yyless 1 lexbuf; start ^ st_one_line_comment lexbuf (* end of recursion when new line or other character *) - | '\n' -> + | '\n' -> (* don't want the newline to be part of the comment *) yyless 1 lexbuf; start | c -> start ^ String.make 1 c ) } - | NEWLINE { + | NEWLINE { (* don't want the newline to be part of the comment *) yyless 1 lexbuf; "" } - | "?>" { + | "?>" { (* "%>" is only when use asp_tags *) yyless 2 lexbuf; "" @@ -1450,8 +1450,8 @@ and st_one_line_comment = parse (*s: repetitive st_one_line_comment rules for error handling *) | eof { pr2 "LEXER: end of file in comment"; "*/" } - | _ { - if !Flag.verbose_lexing + | _ { + if !Flag.verbose_lexing then pr2_once ("LEXER:unrecognised symbol, in st_one_line_comment rule:"^tok lexbuf); tok lexbuf } diff --git a/lang_php/parsing/parser_php.mly b/lang_php/parsing/parser_php.mly index 2d639d24a..604ced08f 100644 --- a/lang_php/parsing/parser_php.mly +++ b/lang_php/parsing/parser_php.mly @@ -1,14 +1,14 @@ /*(*s: parser_php.mly *)*/ /*(*s: Facebook copyright2 *)*/ /* Yoann Padioleau - * + * * Copyright (C) 2009-2012 Facebook * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 2.1 as published by the Free Software Foundation, with the * special exception on linking described in file license.txt. - * + * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file @@ -19,14 +19,14 @@ /*(*s: GRAMMAR prelude *)*/ %{ (* src: ocamlyaccified from zend_language_parser.y in Zend PHP source code. - * updates: + * updates: * - extended to deal with XHP based on XHP bison grammar. * - added support for a few PHP 5.3 extensions (e.g. lambda, const), but * not namespace. * - added support for yield (facebook extension). * - added support for a few PHP 5.4 extensions (e.g. traits, short array). * - added support for generics (another facebook extensions). - * + * /*(*s: Zend copyright *)*/ * +----------------------------------------------------------------------+ * | Zend Engine | @@ -47,7 +47,7 @@ /*(*e: Zend copyright *)*/ * /* Id: zend_language_parser.y 263383 2008-07-24 11:47:14Z dmitry */ * LALR shift/reduce conflicts and how they are resolved: - * - 2 shift/reduce conflicts due to the dangeling elseif/else ambiguity. + * - 2 shift/reduce conflicts due to the dangeling elseif/else ambiguity. * Solved by shift. * %pure_parser * %expect 2 @@ -87,7 +87,7 @@ module Ast = Ast_php /*(*2 The normal tokens *)*/ /*(*-----------------------------------------*)*/ /*(*s: GRAMMAR normal tokens *)*/ -%token +%token T_LNUMBER T_DNUMBER /*(* T_IDENT is for a regular ident and T_VARIABLE is for a dollar ident. * Note that with XHP if you want to add a rule using T_IDENT, you should @@ -104,13 +104,13 @@ module Ast = Ast_php /*(*2 Keyword tokens *)*/ /*(*-----------------------------------------*)*/ -%token +%token T_IF T_ELSE T_ELSEIF T_ENDIF T_DO T_WHILE T_ENDWHILE T_FOR T_ENDFOR T_FOREACH T_ENDFOREACH T_SWITCH T_ENDSWITCH T_CASE T_DEFAULT T_BREAK T_CONTINUE T_RETURN T_TRY T_CATCH T_THROW T_EXIT T_DECLARE T_ENDDECLARE T_USE T_GLOBAL T_AS T_FUNCTION T_CONST T_VAR -/*(* ugly: because of my hack around the implicit echo when use +%token T_OBJECT_OPERATOR T_DOUBLE_ARROW T_OPEN_TAG T_CLOSE_TAG T_OPEN_TAG_WITH_ECHO T_CLOSE_TAG_OF_ECHO T_START_HEREDOC T_END_HEREDOC @@ -140,28 +140,28 @@ module Ast = Ast_php TCOLCOL /*(* pad: was declared as left/right, without a token decl in orig gram *)*/ TCOLON TCOMMA TDOT TBANG TTILDE TQUESTION - TOBRA + TOBRA TPLUS TMINUS TMUL TDIV TMOD TAND TOR TXOR - TEQ + TEQ /*(* now also used for types/generics, as in vector *)*/ TSMALLER TGREATER T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL - T_CONCAT_EQUAL T_MOD_EQUAL + T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL T_INC T_DEC - T_BOOLEAN_OR T_BOOLEAN_AND + T_BOOLEAN_OR T_BOOLEAN_AND /*(* T_SR is (ab)used for types/generics, as in vector> *)*/ T_SL T_SR T_IS_SMALLER_OR_EQUAL T_IS_GREATER_OR_EQUAL - T_BOOL_CAST T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST + T_BOOL_CAST T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_UNSET_CAST T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_IS_EQUAL T_IS_NOT_EQUAL T__AT /*(* was declared implicitely because was using directly the character *)*/ TOPAR TCPAR TOBRACE TCBRACE TCBRA TBACKQUOTE -/*(* ugly: because of my hack around the implicit ';' when use ?>, +/*(* ugly: because of my hack around the implicit ';' when use ?>, * this TSEMICOLON might have a string different than ';' *)*/ TSEMICOLON @@ -208,7 +208,7 @@ module Ast = Ast_php %token T_XHP_ATTR T_XHP_TEXT /*(* xhp keywords. If you add one don't forget to update the 'ident' rule. *)*/ -%token +%token T_XHP_ATTRIBUTE T_XHP_CHILDREN T_XHP_CATEGORY T_XHP_ENUM T_XHP_REQUIRED T_XHP_ANY /*(* T_XHP_EMPTY is T_EMPTY *)*/ @@ -244,7 +244,7 @@ module Ast = Ast_php %left TMUL TDIV TMOD %right TBANG %nonassoc T_INSTANCEOF -%right TTILDE T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST +%right TTILDE T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST %right T__AT %right TOBRA %nonassoc T_NEW T_CLONE @@ -313,10 +313,10 @@ unticked_statement: /* (* todo: this is commented because it is not really used and it generates - * some conflicts now that type_hint is not anymore - * type_hint: ident { ... } but + * some conflicts now that type_hint is not anymore + * type_hint: ident { ... } but * type_hint: class_name_or_selfparent { ... } - * + * * | type_hint variable TSEMICOLON { ... } * | type_hint variable TEQ expr TSEMICOLON { ... } * Right now the only places where we allow types are for parameters @@ -326,31 +326,31 @@ unticked_statement: | TOBRACE inner_statement_list TCBRACE { Block($1,$2,$3) } - | T_IF TOPAR expr TCPAR statement elseif_list else_single + | T_IF TOPAR expr TCPAR statement elseif_list else_single { If($1,($2,$3,$4),$5,$6,$7) } - | T_IF TOPAR expr TCPAR TCOLON - inner_statement_list new_elseif_list new_else_single - T_ENDIF TSEMICOLON + | T_IF TOPAR expr TCPAR TCOLON + inner_statement_list new_elseif_list new_else_single + T_ENDIF TSEMICOLON { IfColon($1,($2,$3,$4),$5,$6,$7,$8,$9,$10) } - | T_WHILE TOPAR expr TCPAR while_statement + | T_WHILE TOPAR expr TCPAR while_statement { While($1,($2,$3,$4),$5) } - | T_DO statement T_WHILE TOPAR expr TCPAR TSEMICOLON + | T_DO statement T_WHILE TOPAR expr TCPAR TSEMICOLON { Do($1,$2,$3,($4,$5,$6),$7) } | T_FOR TOPAR for_expr TSEMICOLON for_expr TSEMICOLON for_expr TCPAR - for_statement + for_statement { For($1,$2,$3,$4,$5,$6,$7,$8,$9) } - | T_SWITCH TOPAR expr TCPAR switch_case_list + | T_SWITCH TOPAR expr TCPAR switch_case_list { Switch($1,($2,$3,$4),$5) } | T_FOREACH TOPAR variable T_AS - foreach_variable foreach_optional_arg TCPAR - foreach_statement + foreach_variable foreach_optional_arg TCPAR + foreach_statement { Foreach($1,$2,mk_e (Lv $3),$4,Left $5,$6,$7,$8) } | T_FOREACH TOPAR expr_without_variable T_AS - variable foreach_optional_arg TCPAR - foreach_statement + variable foreach_optional_arg TCPAR + foreach_statement { Foreach($1,$2,$3,$4,Right $5,$6,$7,$8) } | T_BREAK TSEMICOLON { Break($1,None,$2) } @@ -363,9 +363,9 @@ unticked_statement: | T_RETURN variable TSEMICOLON { Return ($1,Some (mk_e (Lv $2)), $3)} | T_TRY TOBRACE inner_statement_list TCBRACE - T_CATCH TOPAR fully_qualified_class_name T_VARIABLE TCPAR - TOBRACE inner_statement_list TCBRACE - additional_catches + T_CATCH TOPAR fully_qualified_class_name T_VARIABLE TCPAR + TOBRACE inner_statement_list TCBRACE + additional_catches { let try_block = ($2,$3,$4) in let catch_block = ($10, $11, $12) in let catch = ($5, ($6, ($7, DName $8), $9), catch_block) in @@ -376,7 +376,7 @@ unticked_statement: | T_ECHO echo_expr_list TSEMICOLON { Echo($1,$2,$3) } | T_INLINE_HTML { InlineHtml($1) } - | T_OPEN_TAG_WITH_ECHO expr T_CLOSE_TAG_OF_ECHO { + | T_OPEN_TAG_WITH_ECHO expr T_CLOSE_TAG_OF_ECHO { (* todo? cheat ..., ugly, the 2 tokens will have a wrong string *) Echo ($1, [Left $2], $3) } @@ -387,7 +387,7 @@ unticked_statement: | T_UNSET TOPAR unset_variables TCPAR TSEMICOLON { Unset($1,($2,$3,$4),$5) } | T_USE use_filename TSEMICOLON { Use($1,$2,$3) } - | T_DECLARE TOPAR declare_list TCPAR declare_statement + | T_DECLARE TOPAR declare_list TCPAR declare_statement { Declare($1,($2,$3,$4),$5) } /*(*x: GRAMMAR statement *)*/ @@ -407,19 +407,19 @@ foreach_variable: is_reference variable { ($1, $2) } switch_case_list: | TOBRACE case_list TCBRACE { CaseList($1,None,$2,$3) } | TOBRACE TSEMICOLON case_list TCBRACE { CaseList($1, Some $2, $3, $4) } - | TCOLON case_list T_ENDSWITCH TSEMICOLON + | TCOLON case_list T_ENDSWITCH TSEMICOLON { CaseColonList($1,None,$2, $3, $4) } - | TCOLON TSEMICOLON case_list T_ENDSWITCH TSEMICOLON + | TCOLON TSEMICOLON case_list T_ENDSWITCH TSEMICOLON { CaseColonList($1, Some $2, $3, $4, $5) } | T_XHP_COLONID_DEF { failwith_xhp_ambiguity_colon (snd $1) } case_list: case_list_rev { List.rev $1 } case_list_rev: - | /*(*empty*)*/ { [] } - | case_list_rev T_CASE expr case_separator inner_statement_list + | /*(*empty*)*/ { [] } + | case_list_rev T_CASE expr case_separator inner_statement_list { Case($2,$3,$4,$5)::$1 } - | case_list_rev T_DEFAULT case_separator inner_statement_list + | case_list_rev T_DEFAULT case_separator inner_statement_list { Default($2,$3,$4)::$1 } case_separator: @@ -439,7 +439,7 @@ for_statement: | TCOLON inner_statement_list T_ENDFOR TSEMICOLON { ColonStmt($1,$2,$3,$4) } foreach_statement: - | statement { SingleStmt $1 } + | statement { SingleStmt $1 } | TCOLON inner_statement_list T_ENDFOREACH TSEMICOLON { ColonStmt($1,$2,$3,$4)} declare_statement: @@ -453,7 +453,7 @@ elseif_list: new_elseif_list: | /*(*empty*)*/ { [] } - | new_elseif_list T_ELSEIF TOPAR expr TCPAR TCOLON inner_statement_list + | new_elseif_list T_ELSEIF TOPAR expr TCPAR TCOLON inner_statement_list { $1 ++ [$2,($3,$4,$5),$6,$7] } @@ -467,8 +467,8 @@ new_else_single: additional_catch: - | T_CATCH TOPAR fully_qualified_class_name T_VARIABLE TCPAR - TOBRACE inner_statement_list TCBRACE + | T_CATCH TOPAR fully_qualified_class_name T_VARIABLE TCPAR + TOBRACE inner_statement_list TCBRACE { let catch_block = ($6, $7, $8) in let catch = ($1, ($2, ($3, DName $4), $5), catch_block) in catch @@ -490,9 +490,9 @@ static_var_list: static_var_list_rev { List.rev $1 } static_var_list_rev: | T_VARIABLE { [Left (DName $1, None)] } | T_VARIABLE TEQ static_scalar { [Left (DName $1, Some ($2, $3)) ] } - | static_var_list_rev TCOMMA T_VARIABLE + | static_var_list_rev TCOMMA T_VARIABLE { Left (DName $3, None)::Right $2::$1 } - | static_var_list_rev TCOMMA T_VARIABLE TEQ static_scalar + | static_var_list_rev TCOMMA T_VARIABLE TEQ static_scalar { Left (DName $3, Some ($4, $5))::Right $2::$1 } unset_variable: variable { $1 } @@ -508,16 +508,16 @@ use_filename: /*(* PHP 5.3 *)*/ constant_declaration_statement: - | T_CONST T_IDENT TEQ static_scalar TSEMICOLON + | T_CONST T_IDENT TEQ static_scalar TSEMICOLON { ($1, Name $2, $3, $4, $5) } - | T_CONST ext_type_hint T_IDENT TEQ static_scalar TSEMICOLON + | T_CONST ext_type_hint T_IDENT TEQ static_scalar TSEMICOLON { ($1, Name $3, $4, $5, $6) } /*(*************************************************************************)*/ /*(*1 Function declaration *)*/ /*(*************************************************************************)*/ /*(*s: GRAMMAR function declaration *)*/ -function_declaration_statement: +function_declaration_statement: | unticked_function_declaration_statement { $1 } /*(* can not factorize with a 'attributes_opt' rule otherwise get shift/reduce * conflicts. Indeed reading a T_FUNCTION one can not decide between the @@ -531,10 +531,10 @@ function_declaration_statement: unticked_function_declaration_statement: T_FUNCTION is_reference ident type_params_opt - TOPAR parameter_list TCPAR + TOPAR parameter_list TCPAR return_type_opt TOBRACE inner_statement_list TCBRACE - { + { let params = ($5, $6, $7) in let body = ($9, $10, $11) in ({ f_tok = $1; f_ref = $2; f_name = Name $3; f_params = params; @@ -546,31 +546,31 @@ unticked_function_declaration_statement: /*(*x: GRAMMAR function declaration *)*/ /*(* can not factorize, otherwise shift/reduce conflict *)*/ non_empty_parameter_list: - | type_hint_opt T_VARIABLE + | type_hint_opt T_VARIABLE { let p = mk_param $1 $2 in [Left3 p] } - | type_hint_opt TAND T_VARIABLE + | type_hint_opt TAND T_VARIABLE { let p = mk_param $1 $3 in [Left3 {p with p_ref = Some $2}] } | type_hint_opt T_VARIABLE TEQ static_scalar { let p = mk_param $1 $2 in [Left3 {p with p_default = Some ($3,$4)}] } | type_hint_opt TAND T_VARIABLE TEQ static_scalar - { let p = mk_param $1 $3 in + { let p = mk_param $1 $3 in [Left3 {p with p_ref = Some $2; p_default = Some ($4, $5)}] } - /*(* sgrep_ext: *)*/ - | TDOTS - { sgrep_guard ([Middle3 $1]) } - | non_empty_parameter_list TCOMMA TDOTS - { sgrep_guard ($1 ++ [Right3 $2; Middle3 $3]) } + /*(* varargs extension *)*/ + | TDOTS + { [Middle3 $1] } + | non_empty_parameter_list TCOMMA TDOTS + { $1 ++ [Right3 $2; Middle3 $3] } /*(*s: repetitive non_empty_parameter_list *)*/ - | non_empty_parameter_list TCOMMA type_hint_opt T_VARIABLE + | non_empty_parameter_list TCOMMA type_hint_opt T_VARIABLE { let p = mk_param $3 $4 in $1 ++ [Right3 $2; Left3 p] } - | non_empty_parameter_list TCOMMA type_hint_opt TAND T_VARIABLE + | non_empty_parameter_list TCOMMA type_hint_opt TAND T_VARIABLE { let p = mk_param $3 $5 in $1 ++ [Right3 $2; Left3 {p with p_ref = Some $4}] } - | non_empty_parameter_list TCOMMA type_hint_opt T_VARIABLE TEQ static_scalar + | non_empty_parameter_list TCOMMA type_hint_opt T_VARIABLE TEQ static_scalar { let p = mk_param $3 $4 in $1 ++ [Right3 $2; Left3 {p with p_default = Some ($5,$6)}] } - | non_empty_parameter_list TCOMMA type_hint_opt TAND T_VARIABLE TEQ static_scalar - { let p = mk_param $3 $5 in + | non_empty_parameter_list TCOMMA type_hint_opt TAND T_VARIABLE TEQ static_scalar + { let p = mk_param $3 $5 in $1 ++ [Right3 $2; Left3 {p with p_ref = Some $4; p_default = Some ($6, $7)}] } @@ -584,7 +584,7 @@ is_reference: /*(* PHP 5.3 *)*/ lexical_vars: | /*(*empty*)*/ { None } - | T_USE TOPAR lexical_var_list TCPAR { + | T_USE TOPAR lexical_var_list TCPAR { Some ($1, ($2, ($3 +> List.map (function | Right info -> Right info | Left (a,b) -> Left (LexicalVar (a,b)))), $4)) } @@ -600,26 +600,26 @@ lexical_var_list: /*(*1 Class declaration *)*/ /*(*************************************************************************)*/ /*(*s: GRAMMAR class declaration *)*/ -class_declaration_statement: +class_declaration_statement: | unticked_class_declaration_statement { $1 } | attributes unticked_class_declaration_statement { $2 } unticked_class_declaration_statement: | class_entry_type class_name type_params_opt extends_from implements_list - TOBRACE class_statement_list TCBRACE - { { c_type = $1; c_name = $2;c_extends = $4; + TOBRACE class_statement_list TCBRACE + { { c_type = $1; c_name = $2;c_extends = $4; c_implements = $5; c_body = $6, $7, $8; - } + } } | interface_entry class_name type_params_opt interface_extends_list - TOBRACE class_statement_list TCBRACE - { { c_type = Interface $1; c_name = $2; c_extends = None; + TOBRACE class_statement_list TCBRACE + { { c_type = Interface $1; c_name = $2; c_extends = None; (* we use c_implements for interface extension because * it can be a list. ugly? *) - c_implements = $4; c_body = $5, $6, $7; } + c_implements = $4; c_body = $5, $6, $7; } } trait_declaration_statement: @@ -628,13 +628,13 @@ trait_declaration_statement: trait_declaration_statement_aux: | T_TRAIT class_name type_params_opt - TOBRACE class_statement_list TCBRACE + TOBRACE class_statement_list TCBRACE { (* TODO: store $3, right now the info is thrown away! *) { c_type = Trait $1; c_name = $2; c_extends = None; c_implements = None; c_body = ($4, $5, $6) } } /*(*x: GRAMMAR class declaration *)*/ -class_name: +class_name: | ident { Name $1 } /*(*s: class_name grammar rule hook *)*/ /*(* xhp: an XHP element def *)*/ @@ -647,7 +647,7 @@ class_entry_type: | T_ABSTRACT T_CLASS { ClassAbstract ($1, $2) } | T_FINAL T_CLASS { ClassFinal ($1, $2) } -interface_entry: +interface_entry: | T_INTERFACE { $1 } /*(*x: GRAMMAR class declaration *)*/ extends_from: @@ -677,38 +677,38 @@ trait_list: /*(*----------------------------*)*/ class_statement: - | T_CONST class_constant_declaration TSEMICOLON + | T_CONST class_constant_declaration TSEMICOLON { ClassConstants($1, $2, $3) } - | T_CONST ext_type_hint class_constant_declaration TSEMICOLON + | T_CONST ext_type_hint class_constant_declaration TSEMICOLON { ClassConstants($1, $3, $4) } - | variable_modifiers class_variable_declaration TSEMICOLON + | variable_modifiers class_variable_declaration TSEMICOLON { ClassVariables($1, None, $2, $3) } - | variable_modifiers ext_type_hint class_variable_declaration TSEMICOLON - { - ClassVariables($1, $2, $3, $4) + | variable_modifiers ext_type_hint class_variable_declaration TSEMICOLON + { + ClassVariables($1, $2, $3, $4) } | method_declaration { $1 } - | attributes method_declaration { $2 } + | attributes method_declaration { $2 } - | T_XHP_ATTRIBUTE xhp_attribute_decls TSEMICOLON + | T_XHP_ATTRIBUTE xhp_attribute_decls TSEMICOLON { XhpDecl (XhpAttributesDecl ($1, $2, $3)) } - | T_XHP_CHILDREN xhp_children_decl TSEMICOLON + | T_XHP_CHILDREN xhp_children_decl TSEMICOLON { XhpDecl (XhpChildrenDecl ($1, $2, $3)) } - | T_XHP_CATEGORY xhp_category_list TSEMICOLON + | T_XHP_CATEGORY xhp_category_list TSEMICOLON { XhpDecl (XhpCategoriesDecl ($1, $2, $3)) } /*(* php 5.4 traits *)*/ - | T_USE trait_list TSEMICOLON + | T_USE trait_list TSEMICOLON { UseTrait ($1, $2, Left $3) } - | T_USE trait_list TOBRACE trait_rules TCBRACE + | T_USE trait_list TOBRACE trait_rules TCBRACE { UseTrait ($1, $2, Right ($3, $4, $5)) } -method_declaration: +method_declaration: method_modifiers T_FUNCTION is_reference method_name type_params_opt TOPAR parameter_list TCPAR return_type_opt - method_body + method_body { let body, function_type = $10 in Method ({ f_tok = $2; f_ref = $3; f_name = Name $4; f_params = ($6, $7, $8); f_return_type = $9; @@ -717,16 +717,16 @@ method_declaration: } /*(* ugly, php allows method names which should be IMHO reserved keywords *)*/ -method_name: +method_name: | ident { $1 } | T_PARENT { "parent", $1 } | T_SELF { "self", $1 } /*(*x: GRAMMAR class declaration *)*/ class_constant_declaration: - | ident TEQ static_scalar + | ident TEQ static_scalar { [Left ((Name $1), ($2, $3))] } - | class_constant_declaration TCOMMA ident TEQ static_scalar + | class_constant_declaration TCOMMA ident TEQ static_scalar { $1 ++ [Right $2; Left ((Name $3, ($4, $5)))] } @@ -741,9 +741,9 @@ class_variable_declaration: | T_VARIABLE TEQ static_scalar { [Left (DName $1, Some ($2, $3))] } /*(*s: repetitive class_variable_declaration with comma *)*/ - | class_variable_declaration TCOMMA T_VARIABLE + | class_variable_declaration TCOMMA T_VARIABLE { $1 ++ [Right $2; Left (DName $3, None)] } - | class_variable_declaration TCOMMA T_VARIABLE TEQ static_scalar + | class_variable_declaration TCOMMA T_VARIABLE TEQ static_scalar { $1 ++ [Right $2; Left (DName $3, Some ($4, $5))] } /*(*e: repetitive class_variable_declaration with comma *)*/ /*(*x: GRAMMAR class declaration *)*/ @@ -763,21 +763,21 @@ method_body: /*(* mostly a copy paste of the original XHP grammar *)*/ xhp_attribute_decl: - | T_XHP_COLONID_DEF + | T_XHP_COLONID_DEF { XhpAttrInherit $1 } | xhp_attribute_decl_type xhp_attr_name xhp_attribute_default - xhp_attribute_is_required + xhp_attribute_is_required { XhpAttrDecl ($1, ((Ast.str_of_info $2, $2)), $3, $4) } -/*(* In the original grammar each types, e.g. float/string/bool/... - * had their special token. I abuse T_IDENT here, except for +/*(* In the original grammar each types, e.g. float/string/bool/... + * had their special token. I abuse T_IDENT here, except for * enum which needs a special grammar rule. *)*/ xhp_attribute_decl_type: | class_name { XhpAttrType $1 } | T_VAR { XhpAttrType (Name (Ast.str_of_info $1, $1)) } | T_ARRAY { XhpAttrType (Name (Ast.str_of_info $1, $1)) } - | T_XHP_ENUM TOBRACE xhp_enum_list TCBRACE + | T_XHP_ENUM TOBRACE xhp_enum_list TCBRACE { XhpAttrEnum ($1, ($2, $3, $4)) } xhp_attribute_default: @@ -797,11 +797,11 @@ xhp_attr_name: /*(* ugly, but harder to lex foo-name as a single token without * introducing lots of ambiguities. It's ok for :foo:bar but not * for attribute name. - * + * * todo? could check that there is no whitespace between those * tokens. *)*/ - | xhp_attr_name TMINUS xhp_attr_name_atom + | xhp_attr_name TMINUS xhp_attr_name_atom { let s = Ast.str_of_info $1 ^ Ast.str_of_info $2 ^ Ast.str_of_info $3 in Ast.rewrap_str s $1 } @@ -813,12 +813,12 @@ xhp_attr_name_atom: /*(* Just like it's ok (but not good IMHO) to use XHP keywords in place * of regular PHP idents, it's ok to use PHP keywords in place * of XHP attribute names (but again not good IMHO). - * + * * The list of tokens below are all identifier-like keywords mentioned in * the 'keyword tokens' section at the beginning of this file * (which roughly correspond to the tokens in Lexer_php.keywords_table). * There is no conflict introducing this big list of tokens. - * + * * todo? emit a warning when the user use PHP keywords for XHP attribute ? *)*/ | T_ECHO { $1 } | T_PRINT { $1 } | T_IF { $1 } | T_ELSE { $1 } @@ -837,7 +837,7 @@ xhp_attr_name_atom: | T_LINE { $1 } | T_FILE { $1 } | T_LOGICAL_OR { $1 } | T_LOGICAL_AND { $1 } | T_LOGICAL_XOR { $1 } | T_NEW { $1 } | T_CLONE { $1 } | T_INSTANCEOF { $1 } | T_INCLUDE { $1 } | T_INCLUDE_ONCE { $1 } | T_REQUIRE { $1 } - | T_REQUIRE_ONCE { $1 } | T_EVAL { $1 } | T_SELF { $1 } | T_PARENT { $1 } + | T_REQUIRE_ONCE { $1 } | T_EVAL { $1 } | T_SELF { $1 } | T_PARENT { $1 } | T_TRAIT { $1 } | T_INSTEADOF { $1 } | T_TRAIT_C { $1 } /*(*----------------------------*)*/ @@ -853,14 +853,14 @@ xhp_children_decl: | xhp_children_paren_expr { $1 } xhp_children_paren_expr: - | TOPAR xhp_children_decl_expr TCPAR - { XhpChildParen ($1, $2, $3) } - | TOPAR xhp_children_decl_expr TCPAR TMUL - { XhpChildMul (XhpChildParen ($1, $2, $3), $4) } - | TOPAR xhp_children_decl_expr TCPAR TQUESTION - { XhpChildOption (XhpChildParen ($1, $2, $3), $4) } - | TOPAR xhp_children_decl_expr TCPAR TPLUS - { XhpChildPlus (XhpChildParen ($1, $2, $3), $4) } + | TOPAR xhp_children_decl_expr TCPAR + { XhpChildParen ($1, $2, $3) } + | TOPAR xhp_children_decl_expr TCPAR TMUL + { XhpChildMul (XhpChildParen ($1, $2, $3), $4) } + | TOPAR xhp_children_decl_expr TCPAR TQUESTION + { XhpChildOption (XhpChildParen ($1, $2, $3), $4) } + | TOPAR xhp_children_decl_expr TCPAR TPLUS + { XhpChildPlus (XhpChildParen ($1, $2, $3), $4) } xhp_children_decl_expr: | xhp_children_paren_expr { $1 } @@ -869,9 +869,9 @@ xhp_children_decl_expr: | xhp_children_decl_tag TQUESTION { XhpChildOption ($1, $2) } | xhp_children_decl_tag TPLUS { XhpChildPlus ($1, $2) } - | xhp_children_decl_expr TCOMMA xhp_children_decl_expr + | xhp_children_decl_expr TCOMMA xhp_children_decl_expr { XhpChildSequence ($1, $2, $3) } - | xhp_children_decl_expr TOR xhp_children_decl_expr + | xhp_children_decl_expr TOR xhp_children_decl_expr { XhpChildAlternative ($1, $2, $3) } xhp_children_decl_tag: @@ -960,7 +960,7 @@ non_empty_ext_type_hint_list: /*(* Do not confuse type_parameters and type_arguments. Type parameters * can only be simple identifiers, as in class Foo { ... }, - * and are used in a 'definition' context, whereas type arguments + * and are used in a 'definition' context, whereas type arguments * can be complex types, as in class X extends Foo>, * and are used in a 'use' context. *)*/ @@ -1006,55 +1006,55 @@ expr: | r_variable { mk_e (Lv $1) } | expr_without_variable { $1 } -expr_without_variable: +expr_without_variable: | expr_without_variable_bis { mk_e $1 } /*(* xhp: This extension fix a deficiency in the original PHP grammar * which does not allow to do things like foo()[2]. Array accesses * in PHP were allowed only for variables. * - * The rule below generates 3 s/r conflicts. + * The rule below generates 3 s/r conflicts. * * I thought I could remove some conflicts by putting the rule * closer to the funcall_call rule, which does not * generate any conflict, but it then can not handle * code like $o->foo()['foo']. - * - * I then thought I could put it closer to variable2, + * + * I then thought I could put it closer to variable2, * but it must be 'variable' not 'variable2' otherwise * it can not process code like foo()['fld1']['fld2']. - * + * * In fact it can not be put close to 'variable' either because * then it can not parse code like ($this->getAssocData($user))[0]; * So the right place is here, in expr_without_variable. *)*/ | expr TOBRA dim_offset TCBRA - { + { match $1 with - (* Lv corresponds to Lvalue which includes function calls so - * foo()[1] will be translated into a + (* Lv corresponds to Lvalue which includes function calls so + * foo()[1] will be translated into a * VArrayAccess(FunCallSimple(...), 1). *) | Lv v -> let var = (VArrayAccess (v, ($2, $3, $4))) in mk_e (Lv var) - (* The 'lvalue' type was originally restricted to variables and + (* The 'lvalue' type was originally restricted to variables and * function/method calls. It could not cope with - * VArrayAccess of arbitrary expressions such as + * VArrayAccess of arbitrary expressions such as * "array(1,2,3)" which are ConsArray of the 'expr' type. - * + * * So for the general case we could have transformed * such array access into calls to __xhp_idx(), just like * in the original XHP parser, and introduced some fake tokens * in the Ast. But having fake tokens break some * of the assumptions we have later when we build the database of code - * so it's simpler to introduce a new constructor, + * so it's simpler to introduce a new constructor, * VarrayAccessXhp that can accept expressions. This forces us * to add extra code to handle yet another constructor, but * the __xhp_idx() solution would also force us, to have precise * analysis, to special case such function calls anyway. - * + * * An alternative would be to rethink ast_php.ml and * merge the 'expr' and 'lvalue' types together. *) @@ -1071,7 +1071,7 @@ expr_without_variable_bis: | variable TEQ expr { Assign($1,$2,$3) } | variable TEQ TAND variable { AssignRef($1,$2,$3,$4) } - | variable TEQ TAND T_NEW class_name_reference ctor_arguments + | variable TEQ TAND T_NEW class_name_reference ctor_arguments { AssignNew($1,$2,$3,$4,$5,$6) } @@ -1130,17 +1130,17 @@ expr_without_variable_bis: | TTILDE expr { Unary((UnTilde,$1),$2) } - | T_LIST TOPAR assignment_list TCPAR TEQ expr + | T_LIST TOPAR assignment_list TCPAR TEQ expr { AssignList($1,($2,$3,$4),$5,$6) } - | T_ARRAY TOPAR array_pair_list TCPAR + | T_ARRAY TOPAR array_pair_list TCPAR { ArrayLong($1,($2,$3,$4)) } | TOBRA array_pair_list TCBRA { ArrayShort($1, $2, $3) } - | T_NEW class_name_reference ctor_arguments + | T_NEW class_name_reference ctor_arguments { New($1,$2,$3) } | T_CLONE expr { Clone($1,$2) } - | expr T_INSTANCEOF class_name_reference + | expr T_INSTANCEOF class_name_reference { InstanceOf($1,$2,$3) } | expr TQUESTION expr TCOLON expr { CondExpr($1,$2,Some $3,$4,$5) } @@ -1152,11 +1152,11 @@ expr_without_variable_bis: * ambiguities. See tests/xhp_pb_but_ok/colon_ambiguity*.php * I don't want to solve those ambiguities but I can at least print a * useful parsing error message with those fake rules. - * - * Everywhere in this grammar where we use TCOLON we should add + * + * Everywhere in this grammar where we use TCOLON we should add * an error rule similar to the one below. *)*/ - | expr TQUESTION expr T_XHP_COLONID_DEF + | expr TQUESTION expr T_XHP_COLONID_DEF { failwith_xhp_ambiguity_colon (snd $4) } | T_BOOL_CAST expr { Cast((BoolTy,$1),$2) } @@ -1176,8 +1176,8 @@ expr_without_variable_bis: | TBACKQUOTE encaps_list TBACKQUOTE { BackQuote($1,$2,$3) } /*(* PHP 5.3 *)*/ | T_FUNCTION is_reference TOPAR parameter_list TCPAR return_type_opt - lexical_vars - TOBRACE inner_statement_list TCBRACE + lexical_vars + TOBRACE inner_statement_list TCBRACE { let params = ($3, $4, $5) in let body = ($8, $9, $10) in Lambda ($7, { f_tok = $1;f_ref = $2;f_params = params; f_body = body; @@ -1197,7 +1197,7 @@ expr_without_variable_bis: /*(* sgrep_ext: *)*/ | TDOTS { sgrep_guard (SgrepExprDots $1) } - /*(* xhp: do not put in 'expr', otherwise can't have xhp + /*(* xhp: do not put in 'expr', otherwise can't have xhp * in function arguments *)*/ | xhp_html { XhpHtml $1 } @@ -1226,9 +1226,9 @@ scalar: | ident { C (CName (Name $1)) } | class_constant { ClassConstant (fst $1, snd $1) } - | TGUIL encaps_list TGUIL + | TGUIL encaps_list TGUIL { Guil ($1, $2, $3)} - | T_START_HEREDOC encaps_list T_END_HEREDOC + | T_START_HEREDOC encaps_list T_END_HEREDOC { HereDoc ($1, $2, $3) } /*(* generated by lexer for special case of ${beer}s. So it's really @@ -1251,7 +1251,7 @@ static_scalar: /* compile-time evaluated scalars */ | TOBRA static_array_pair_list TCBRA { ArrayShort($1, $2, $3) } /*(* todo? ensure encaps_list contains only constant strings? *)*/ - | T_START_HEREDOC encaps_list T_END_HEREDOC + | T_START_HEREDOC encaps_list T_END_HEREDOC { Sc (HereDoc ($1, $2, $3)) } /*(*s: static_scalar grammar rule hook *)*/ /* xdebug TODO AST */ @@ -1266,45 +1266,45 @@ common_scalar: | T_CONSTANT_ENCAPSED_STRING { String($1) } - | T_LINE { PreProcess(Line, $1) } + | T_LINE { PreProcess(Line, $1) } | T_FILE { PreProcess(File, $1) } | T_DIR { PreProcess(Dir, $1) } | T_CLASS_C { PreProcess(ClassC, $1) } | T_TRAIT_C { PreProcess(TraitC, $1)} | T_FUNC_C { PreProcess(FunctionC, $1) }|T_METHOD_C { PreProcess(MethodC, $1)} /*(*s: common_scalar grammar rule hook *)*/ - | T_CLASS_XDEBUG class_name TOBRACE class_statement_list TCBRACE { + | T_CLASS_XDEBUG class_name TOBRACE class_statement_list TCBRACE { XdebugClass ($2, $4) } - | T_CLASS_XDEBUG class_name TOBRACE TDOTS TCBRACE { + | T_CLASS_XDEBUG class_name TOBRACE TDOTS TCBRACE { XdebugClass ($2, []) } - | T_CLASS_XDEBUG class_name TOBRACE TDOTS TSEMICOLON TCBRACE { + | T_CLASS_XDEBUG class_name TOBRACE TDOTS TSEMICOLON TCBRACE { XdebugClass ($2, []) } | T_RESOURCE_XDEBUG { XdebugResource } /*(*e: common_scalar grammar rule hook *)*/ -class_constant: +class_constant: | qualifier ident { $1, (Name $2) } -static_class_constant: class_constant { $1 } +static_class_constant: class_constant { $1 } /*(*x: GRAMMAR scalar *)*/ /*(* can not factorize, otherwise shift/reduce conflict *)*/ static_array_pair_list: static_array_pair_list_rev { List.rev $1 } non_empty_static_array_pair_list_rev: - | static_scalar + | static_scalar { [Left (ArrayExpr $1)] } | static_scalar T_DOUBLE_ARROW static_scalar { [Left (ArrayArrowExpr ($1,$2,$3))]} /*(*s: repetitive non_empty_static_array_pair_list *)*/ | non_empty_static_array_pair_list_rev TCOMMA - static_scalar + static_scalar { Left (ArrayExpr $3)::Right $2::$1 } | non_empty_static_array_pair_list_rev TCOMMA - static_scalar T_DOUBLE_ARROW static_scalar + static_scalar T_DOUBLE_ARROW static_scalar { Left (ArrayArrowExpr ($3,$4,$5))::Right $2::$1 } /*(*e: repetitive non_empty_static_array_pair_list *)*/ /*(*e: GRAMMAR scalar *)*/ @@ -1317,14 +1317,14 @@ non_empty_static_array_pair_list_rev: variable: variable2 { variable2_to_lvalue $1 } /*(*x: GRAMMAR variable *)*/ variable2: - | base_variable_with_function_calls + | base_variable_with_function_calls { Variable ($1,[]) } - | base_variable_with_function_calls - T_OBJECT_OPERATOR object_property method_or_not + | base_variable_with_function_calls + T_OBJECT_OPERATOR object_property method_or_not variable_properties { Variable ($1, ($2, $3, $4)::$5) } /*(* sgrep_ext: *)*/ - | T_IDENT T_OBJECT_OPERATOR object_property method_or_not + | T_IDENT T_OBJECT_OPERATOR object_property method_or_not variable_properties /*(* contains more T_OBJECT_OPERATOR *)*/ { sgrep_guard (raise Todo) } @@ -1333,11 +1333,11 @@ base_variable_with_function_calls: | function_call { $1 } base_variable: - | variable_without_objects + | variable_without_objects { None, $1 } - | qualifier variable_without_objects /*(*static_member*)*/ + | qualifier variable_without_objects /*(*static_member*)*/ { Some (Left3 $1), $2 } - | variable_class_name TCOLCOL variable_without_objects + | variable_class_name TCOLCOL variable_without_objects { Some (Right3 ($1, $2)), $3 } @@ -1397,7 +1397,7 @@ non_empty_function_call_parameter_list: /*(*s: repetitive non_empty_function_call_parameter_list *)*/ | non_empty_function_call_parameter_list TCOMMA variable { $1 ++ [Right $2; Left (Arg (mk_e (Lv $3)))] } - | non_empty_function_call_parameter_list TCOMMA expr_without_variable + | non_empty_function_call_parameter_list TCOMMA expr_without_variable { $1 ++ [Right $2; Left (Arg ($3))] } | non_empty_function_call_parameter_list TCOMMA TAND w_variable { $1 ++ [Right $2; Left (ArgRef($3,$4))] } @@ -1422,13 +1422,13 @@ non_empty_array_pair_list_rev: | expr T_DOUBLE_ARROW TAND w_variable { [Left (ArrayArrowRef($1,$2,$3,$4))] } /*(*s: repetitive non_empty_array_pair_list *)*/ - | non_empty_array_pair_list_rev TCOMMA expr + | non_empty_array_pair_list_rev TCOMMA expr { Left (ArrayExpr $3)::Right $2::$1 } - | non_empty_array_pair_list_rev TCOMMA TAND w_variable + | non_empty_array_pair_list_rev TCOMMA TAND w_variable { Left (ArrayRef ($3,$4))::Right $2::$1 } - | non_empty_array_pair_list_rev TCOMMA expr T_DOUBLE_ARROW expr + | non_empty_array_pair_list_rev TCOMMA expr T_DOUBLE_ARROW expr { Left (ArrayArrowExpr($3,$4,$5))::Right $2::$1 } - | non_empty_array_pair_list_rev TCOMMA expr T_DOUBLE_ARROW TAND w_variable + | non_empty_array_pair_list_rev TCOMMA expr T_DOUBLE_ARROW TAND w_variable { Left (ArrayArrowRef($3,$4,$5,$6))::Right $2::$1 } /*(*e: repetitive non_empty_array_pair_list *)*/ /*(*x: GRAMMAR variable *)*/ @@ -1437,7 +1437,7 @@ non_empty_array_pair_list_rev: /*(*2 XHP embeded html *)*/ /*(*----------------------------*)*/ xhp_html: - | T_XHP_OPEN_TAG xhp_attributes T_XHP_GT xhp_children T_XHP_CLOSE_TAG + | T_XHP_OPEN_TAG xhp_attributes T_XHP_GT xhp_children T_XHP_CLOSE_TAG { Xhp ($1, $2, $3, $4, $5) } | T_XHP_OPEN_TAG xhp_attributes T_XHP_SLASH_GT { XhpSingleton ($1, $2, $3) } @@ -1477,16 +1477,16 @@ exit_expr: /*(*************************************************************************)*/ /*(*s: GRAMMAR namespace *)*/ -ident: +ident: | T_IDENT { $1 } /*(* xhp: it is ok to use XHP keywords in place where regular PHP names * are expected as in 'function children($x) { ... }'. - * - * We extend here the grammar to support those "overloading". An - * alternative would be to extend the lexer to only lex XHP keywords + * + * We extend here the grammar to support those "overloading". An + * alternative would be to extend the lexer to only lex XHP keywords * in certain context, but this would force to share some states between * the lexer and parser. - * + * * todo? emit a warning when the user use XHP keywords for regular idents ? *)*/ | T_XHP_ATTRIBUTE { Ast.str_of_info $1, $1 } @@ -1518,7 +1518,7 @@ class_name_or_selfparent: * This is currently equivalent to 'class_name' but adding * namespace at some point may change that. *)*/ -fully_qualified_class_name: +fully_qualified_class_name: | ident { Name $1 } /*(*s: fully_qualified_class_name grammar rule hook *)*/ /*(* xhp: an XHP element use *)*/ @@ -1543,8 +1543,8 @@ class_name_reference: dynamic_class_name_reference: | base_variable_bis { ($1, []) } - | base_variable_bis - T_OBJECT_OPERATOR object_property + | base_variable_bis + T_OBJECT_OPERATOR object_property dynamic_class_name_variable_properties { ($1, ($2, $3)::$4) } @@ -1567,7 +1567,7 @@ object_property: | object_dim_list { ObjProp $1 } | variable_without_objects_bis { ObjPropVar $1 } -variable_without_objects_bis: variable_without_objects +variable_without_objects_bis: variable_without_objects { vwithoutobj_to_variable $1 } /*(* quite similar to reference_variable, but without the '$' *)*/ @@ -1594,14 +1594,14 @@ dynamic_class_name_variable_property: T_OBJECT_OPERATOR object_property /*(*s: GRAMMAR encaps *)*/ encaps: | T_ENCAPSED_AND_WHITESPACE { EncapsString $1 } - | T_VARIABLE + | T_VARIABLE { let refvar = (Var2 (DName $1, Ast.noScope())) in let basevar = None, ([], refvar) in let basevarbis = BaseVar basevar in let var = Variable (basevarbis, []) in EncapsVar (variable2_to_lvalue var) } - | T_VARIABLE TOBRA encaps_var_offset TCBRA + | T_VARIABLE TOBRA encaps_var_offset TCBRA { let refvar = (Var2 (DName $1, Ast.noScope())) in let dimoffset = Some (mk_e $3) in let refvar = VArrayAccess2(refvar, ($2, dimoffset, $4)) in @@ -1610,7 +1610,7 @@ encaps: let var = Variable (basevarbis, []) in EncapsVar (variable2_to_lvalue var) } - | T_VARIABLE T_OBJECT_OPERATOR T_IDENT + | T_VARIABLE T_OBJECT_OPERATOR T_IDENT { let refvar = (Var2 (DName $1, Ast.noScope())) in let basevar = None, ([], refvar) in let basevarbis = BaseVar basevar in @@ -1622,13 +1622,13 @@ encaps: /*(* for ${beer}s. Note that this rule does not exist in the original PHP * grammar. Instead only the case with a TOBRA after the T_STRING_VARNAME - * is covered. The case with only a T_STRING_VARNAME is handled + * is covered. The case with only a T_STRING_VARNAME is handled * originally in the scalar rule, but it does not makes sense to me * as it's really more a variable than a scaler. So for now I have * defined this rule. maybe it's too restrictive, we'll see. *)*/ - | T_DOLLAR_OPEN_CURLY_BRACES T_STRING_VARNAME TCBRACE - { + | T_DOLLAR_OPEN_CURLY_BRACES T_STRING_VARNAME TCBRACE + { (* this is not really a T_VARIABLE, bit it's still conceptually * a variable so we build it almost like above *) @@ -1639,11 +1639,11 @@ encaps: EncapsDollarCurly ($1, variable2_to_lvalue var, $3) } - | T_DOLLAR_OPEN_CURLY_BRACES T_STRING_VARNAME TOBRA expr TCBRA TCBRACE + | T_DOLLAR_OPEN_CURLY_BRACES T_STRING_VARNAME TOBRA expr TCBRA TCBRACE { let refvar = (Var2 (DName $2, Ast.noScope())) in let dimoffset = Some ($4) in let refvar = VArrayAccess2(refvar, ($3, dimoffset, $5)) in - + let basevar = None, ([], refvar) in let basevarbis = BaseVar basevar in let var = Variable (basevarbis, []) in @@ -1656,8 +1656,8 @@ encaps: | T_DOLLAR_OPEN_CURLY_BRACES expr TCBRACE { EncapsExpr ($1, $2, $3) } /*(*x: GRAMMAR encaps *)*/ encaps_var_offset: - | T_IDENT { - (* It looks like an ident but as we are in encaps_var_offset, + | T_IDENT { + (* It looks like an ident but as we are in encaps_var_offset, * php allows array access inside strings to omit the quote * around fieldname, so it's actually really a Constant (String) * rather than an ident, as we usually do for other T_IDENT @@ -1666,7 +1666,7 @@ encaps_var_offset: let cst = String $1 in (* will not have enclosing "'" as usual *) Sc (C cst) } - | T_VARIABLE { + | T_VARIABLE { let refvar = (Var2 (DName $1, Ast.noScope())) in let basevar = None, ([], refvar) in let basevarbis = BaseVar basevar in @@ -1674,7 +1674,7 @@ encaps_var_offset: Lv (variable2_to_lvalue var) } | T_NUM_STRING { - (* the original php lexer does not return some numbers for + (* the original php lexer does not return some numbers for * offset of array access inside strings. Not sure why ... * TODO? *) @@ -1708,7 +1708,7 @@ variable_properties: | /*(*empty*)*/ { [] } dynamic_class_name_variable_properties: - | dynamic_class_name_variable_properties dynamic_class_name_variable_property + | dynamic_class_name_variable_properties dynamic_class_name_variable_property { $1 ++ [$2] } | /*(*empty*)*/ { [] } @@ -1736,7 +1736,7 @@ non_empty_additional_catches: /*(*s: repetitive xxx and non_empty_xxx *)*/ method_modifiers: | /*(*empty*)*/ { [] } - | non_empty_member_modifiers { $1 } + | non_empty_member_modifiers { $1 } non_empty_member_modifiers: | member_modifier { [$1] } diff --git a/tests/php/parsing/generics_test_02.php b/tests/php/parsing/generics_test_02.php new file mode 100644 index 000000000..692323e54 --- /dev/null +++ b/tests/php/parsing/generics_test_02.php @@ -0,0 +1,37 @@ +(vector $list, int $idx):X { + return $list->d[$idx]; +} + +function pair(X $x, Y $y):(X,Y) { return array($x, $y); } + +function car((X,?Y) $pair):X { + return $pair[0]; +} + +interface Face { +} + +class vector { function __construct($x) { $this->d = $x; } } + +function vector(/*...*/):vector { + return new vector(func_get_args()); +} + +class Foo implements Face { + const string BLEH = "b"; +} + +$blork = pair('c', '-'); + +function right_shift_hack(Foo,Foo>>,Foo>>>,Foo>>>> $bonk, (function(Foo,Bar):C) $d) { +} + +$a = vector('a','aa','aaa'); +$d = (function():UNICORNS{return 'd';}); +echo vidx($a, 0), Foo::BLEH, car($blork), $d(); + +abcd; diff --git a/tests/php/parsing/hh.php b/tests/php/parsing/hh.php new file mode 100644 index 000000000..8b5238ea9 --- /dev/null +++ b/tests/php/parsing/hh.php @@ -0,0 +1,3 @@ +