Skip to content

Commit

Permalink
ignore hack keywords in php files
Browse files Browse the repository at this point in the history
Summary:
Hack keywords in PHP files should be understood as simple identifiers. For example:
```
class inout {}
```
The simplest way to do that is to have the lexer aware of what it is lexing: PHP or Hack. This is done using a - for now very small - `Lexer_env` module.
The generated `Full_fidelity_token_kind` does the lexing of keywords in function `from_string`. We change the generator so that this function now accept an `is_hack` argument. We also change the `Full_fidelity_schema` so that developers now have to specify whether a given keyword is "hack_only" or "php_and_hack".

Reviewed By: jamesjwu

Differential Revision: D8735588

fbshipit-source-id: 06474fd994885dce3f1ea9fed695bf5ba7bfac33
  • Loading branch information
CatherineGasnier authored and fredemmott committed Jul 19, 2018
1 parent 9db89dc commit 16a8703
Show file tree
Hide file tree
Showing 36 changed files with 1,764 additions and 423 deletions.
30 changes: 28 additions & 2 deletions hphp/hack/src/generate_full_fidelity.ml
Expand Up @@ -2151,11 +2151,37 @@ module GenerateFFTokenKind = struct
let to_kind_declaration x =
sprintf " | %s\n" x.token_kind

let add_guard_or_pad :
cond:(bool * string) -> ?else_cond:(bool * string) -> string -> string =
fun ~cond:(cond, guard) ?else_cond guards ->
let pad str = String.make (String.length str) ' ' in
let is_only_spaces str = (str = (pad str)) in
let make_same_length str1 str2 =
let blanks n = try String.make n ' ' with Invalid_argument _ -> "" in
let (len1, len2) = (String.length str1, String.length str2) in
let str1 = str1 ^ (blanks (len2 - len1)) in
let str2 = str2 ^ (blanks (len1 - len2)) in
(str1, str2) in
let (else_cond, else_guard) = match else_cond with
| Some (cond, guard) -> cond, guard
| None -> false, "" in
let prefix = if cond || else_cond
then if is_only_spaces guards then "when " else "&& "
else " " in
let (guard, else_guard) = make_same_length guard else_guard in
let guard = if cond then guard
else if else_cond then else_guard
else pad guard in
guards ^ prefix ^ guard ^ " "

let to_from_string x =
let token_text = escape_token_text x.token_text in
let spacer_width = given_text_width - String.length token_text in
let spacer = String.make spacer_width ' ' in
sprintf " | \"%s\"%s -> Some %s\n" token_text spacer x.token_kind
let guards = add_guard_or_pad ""
~cond:(x.is_xhp, "(is_hack || allow_xhp)")
~else_cond:(x.hack_only, "is_hack") in
sprintf " | \"%s\"%s %s-> Some %s\n" token_text spacer guards x.token_kind

let to_to_string x =
let token_text = escape_token_text x.token_text in
Expand All @@ -2174,7 +2200,7 @@ KIND_DECLARATIONS_GIVEN_TEXT (* Variable text tokens *)
KIND_DECLARATIONS_VARIABLE_TEXT
[@@deriving show]
let from_string keyword =
let from_string keyword ~is_hack ~allow_xhp =
match keyword with
| \"true\" -> Some BooleanLiteral
| \"false\" -> Some BooleanLiteral
Expand Down
4 changes: 4 additions & 0 deletions hphp/hack/src/hh_single_compile.ml
Expand Up @@ -13,6 +13,7 @@ open Sys_utils
module P = Printf
module SyntaxError = Full_fidelity_syntax_error
module SourceText = Full_fidelity_source_text
module Lex = Full_fidelity_lexer
module Logger = HackcEventLogger

(*****************************************************************************)
Expand Down Expand Up @@ -277,11 +278,14 @@ let parse_text compiler_options popt fn text =
not (Hhbc_options.source_mapping !Hhbc_options.compiler_options) in
let enable_hh_syntax =
Hhbc_options.enable_hiphop_syntax !Hhbc_options.compiler_options in
let enable_xhp =
Hhbc_options.enable_xhp !Hhbc_options.compiler_options in
let php5_compat_mode =
not (Hhbc_options.enable_uniform_variable_syntax !Hhbc_options.compiler_options) in
let hacksperimental =
Hhbc_options.hacksperimental !Hhbc_options.compiler_options in
let systemlib_compat_mode = Emit_env.is_systemlib () in
Lex.Env.set ~force_hh:enable_hh_syntax ~enable_xhp;
let env = Full_fidelity_ast.make_env
~parser_options:popt
~ignore_pos
Expand Down
39 changes: 30 additions & 9 deletions hphp/hack/src/parser/full_fidelity_lexer.ml
Expand Up @@ -12,13 +12,29 @@ module TokenKind = Full_fidelity_token_kind
module SourceText = Full_fidelity_source_text
module SyntaxError = Full_fidelity_syntax_error

module Env = struct

let force_hh_opt = ref false
let enable_xhp_opt = ref false
let is_hh_file = ref true

let set_is_hh_file b = is_hh_file := b
let set ~force_hh ~enable_xhp =
force_hh_opt := force_hh;
enable_xhp_opt := enable_xhp

let is_hh () = !is_hh_file || !force_hh_opt
let enable_xhp () = is_hh () || !enable_xhp_opt

end

module Lexer : sig
type t = {
text : SourceText.t;
start : int; (* Both start and offset are absolute offsets in the text. *)
offset : int;
errors : SyntaxError.t list;
hacksperimental : bool
hacksperimental : bool;
} [@@deriving show]
val make : ?hacksperimental:bool -> SourceText.t -> t
val make_at : ?hacksperimental:bool -> SourceText.t -> int -> t
Expand All @@ -44,7 +60,7 @@ end = struct
start : int; (* Both start and offset are absolute offsets in the text. *)
offset : int;
errors : SyntaxError.t list;
hacksperimental : bool (* write-once: record updates should not update this field *)
hacksperimental : bool; (* write-once: record updates should not update this field *)
} [@@deriving show]

let make ?(hacksperimental = false) text =
Expand Down Expand Up @@ -1397,25 +1413,26 @@ let as_case_insensitive_keyword text =
non-lower versions in our codebase. *)
let lower = String.lowercase_ascii text in
match lower with
| "__halt_compiler" | "abstract" | "and" | "array" | "as" | "bool" | "boolean" | "break"
| "__halt_compiler" | "abstract" | "and" | "array" | "as" | "bool" | "boolean" | "break"
| "callable"
| "case" | "catch" | "class" | "clone" | "const" | "continue" | "declare" | "default"
| "die" | "do" | "echo" | "else" | "elseif" | "empty" | "enddeclare" | "endfor"
| "endforeach" | "endif" | "endswitch" | "endwhile" | "eval" | "exit" | "extends" | "false"
| "final" | "finally" | "for" | "foreach" | "function" | "global" | "goto" | "if"
| "implements" | "include" | "include_once" | "inout" | "instanceof" | "insteadof" | "int"
| "integer"
| "implements" | "include" | "include_once" | "instanceof" | "insteadof" | "int" | "integer"
| "interface" | "isset" | "list" | "namespace" | "new" | "null" | "or" | "parent"
| "print" | "private" | "protected" | "public" | "require" | "require_once"
| "return" | "self" | "static" | "string" | "switch" | "throw" | "trait"
| "try" | "true" | "unset" | "use" | "using" | "var" | "void" | "while"
| "try" | "true" | "unset" | "use" | "var" | "void" | "while"
| "xor" | "yield" -> lower
| "inout" | "using" when Env.is_hh () -> lower
| _ -> text

let as_keyword kind lexer =
if kind = TokenKind.Name then
let text = as_case_insensitive_keyword (current_text lexer) in
match TokenKind.from_string text with
let is_hack = Env.is_hh () and allow_xhp = Env.enable_xhp () in
match TokenKind.from_string text ~is_hack ~allow_xhp with
| Some TokenKind.Let when (not (hacksperimental lexer)) -> TokenKind.Name
| Some keyword -> keyword
| _ -> TokenKind.Name
Expand Down Expand Up @@ -1608,8 +1625,12 @@ let skip_to_end_of_markup lexer ~is_leading_section =
let ch1 = peek_char lexer 1 in
let ch2 = peek_char lexer 2 in
match ch0, ch1, ch2 with
| ('H' | 'h'), ('H' | 'h'), _ -> make_long_tag lexer 2
| ('P' | 'p'), ('H' | 'h'), ('P' | 'p') -> make_long_tag lexer 3
| ('H' | 'h'), ('H' | 'h'), _ ->
Env.set_is_hh_file true;
make_long_tag lexer 2
| ('P' | 'p'), ('H' | 'h'), ('P' | 'p') ->
Env.set_is_hh_file false;
make_long_tag lexer 3
| '=', _, _ ->
begin
(* skip = *)
Expand Down
4 changes: 4 additions & 0 deletions hphp/hack/src/parser/full_fidelity_lexer.mli
Expand Up @@ -7,6 +7,10 @@
*
*)

module Env : sig
val set : force_hh:bool -> enable_xhp:bool -> unit
end

module WithToken : functor (Token : Lexable_token_sig.LexableToken_S) -> sig
type t [@@deriving show]
type string_literal_kind =
Expand Down

0 comments on commit 16a8703

Please sign in to comment.