Permalink
Browse files

cmf: optimize loading autoload file, part1

Summary:
It currently takes 7.4s to cmf to parse the autoload file
(see cmf -profile). This patch introduces a new parsing
function for PHP which removes many features that are not
necessary for what we do on the autoload file.
It now takes 1.5s to parse the huge autoload file.

There are other optimizations we could do:
 - cache the parsed AST on the disk
 - not use the PHP parser at all and make a parsing function
   specialized for the autoload file
 - use julien's fbstrict parser (dunno if faster on such a file)
 - use hiphop parser
 - ...

The next patch will modify check_module to use this new function.

Test Plan:
$ time ~/pfff/pfff.opt -profile -parse_php_fast autoload_map.php
1.5s

Reviewers: pieter, zoel

Reviewed By: pieter

CC: julienv, andrewparoski, jakubv, ptarjan

Differential Revision: https://phabricator.fb.com/D658432

Task ID: 1915724
  • Loading branch information...
aryx committed Dec 14, 2012
1 parent 61f3158 commit cd490d3e515d7303e45263d3f167759386e40e24
Showing with 75 additions and 0 deletions.
  1. +67 −0 lang_php/parsing/parse_php.ml
  2. +3 −0 lang_php/parsing/parse_php.mli
  3. +5 −0 lang_php/parsing/test_parsing_php.ml
@@ -545,4 +545,71 @@ let (class_def_of_string: string -> Ast_php.class_def) = fun s ->
let lexbuf = Lexing.from_string s in
Parser_php.class_declaration_statement basic_lexer_skip_comments lexbuf
+(* The default PHP parser function stores position information for all tokens,
+ * build some Parse_php.info_items for each toplevel entities, and
+ * do other things which are most of the time useful for some analysis
+ * but starts to really slow down parsing for huge (generated) PHP files.
+ * Enters parse_fast() that disables most of those things.
+ * Note that it may not parse correctly all PHP code, so use with
+ * caution.
+ *)
+let parse_fast file =
+ let chan = open_in file in
+ let lexbuf = Lexing.from_channel chan in
+ Lexer_php.reset();
+ Lexer_php._mode_stack := [Lexer_php.INITIAL];
+
+ let rec php_next_token lexbuf =
+ let tok =
+ (* for yyless emulation *)
+ match !Lexer_php._pending_tokens with
+ | x::xs ->
+ Lexer_php._pending_tokens := xs;
+ x
+ | [] ->
+ (match Lexer_php.current_mode () with
+ | Lexer_php.INITIAL ->
+ Lexer_php.initial lexbuf
+ | Lexer_php.ST_IN_SCRIPTING ->
+ Lexer_php.st_in_scripting lexbuf
+ | Lexer_php.ST_IN_SCRIPTING2 ->
+ Lexer_php.st_in_scripting lexbuf
+ | Lexer_php.ST_DOUBLE_QUOTES ->
+ Lexer_php.st_double_quotes lexbuf
+ | Lexer_php.ST_BACKQUOTE ->
+ Lexer_php.st_backquote lexbuf
+ | Lexer_php.ST_LOOKING_FOR_PROPERTY ->
+ Lexer_php.st_looking_for_property lexbuf
+ | Lexer_php.ST_LOOKING_FOR_VARNAME ->
+ Lexer_php.st_looking_for_varname lexbuf
+ | Lexer_php.ST_VAR_OFFSET ->
+ Lexer_php.st_var_offset lexbuf
+ | Lexer_php.ST_START_HEREDOC s ->
+ Lexer_php.st_start_heredoc s lexbuf
+ | Lexer_php.ST_START_NOWDOC s ->
+ Lexer_php.st_start_nowdoc s lexbuf
+ | Lexer_php.ST_IN_XHP_TAG current_tag ->
+ Lexer_php.st_in_xhp_tag current_tag lexbuf
+ | Lexer_php.ST_IN_XHP_TEXT current_tag ->
+ Lexer_php.st_in_xhp_text current_tag lexbuf
+ )
+ in
+ match tok with
+ | Parser_php.T_COMMENT _ | Parser_php.T_DOC_COMMENT _
+ | Parser_php.TSpaces _ | Parser_php.TNewline _
+ | Parser_php.TCommentPP _
+ | Parser_php.T_OPEN_TAG _
+ | Parser_php.T_CLOSE_TAG _ ->
+ php_next_token lexbuf
+ | _ -> tok
+ in
+ try
+ let res = Parser_php.main php_next_token lexbuf in
+ close_in chan;
+ res
+ with Parsing.Parse_error ->
+ pr2 (spf "parsing error in php fast parser: %s"
+ (Lexing.lexeme lexbuf));
+ raise Parsing.Parse_error
+
(*e: parse_php.ml *)
@@ -34,6 +34,9 @@ val ast_and_tokens:
val parse_any:
Common.filename -> Ast_php.any
+val parse_fast:
+ Common.filename -> Ast_php.program
+
(*s: extra parse function signature *)
val xdebug_expr_of_string: string -> Ast_php.expr
val class_def_of_string: string -> Ast_php.class_def
@@ -158,6 +158,9 @@ let test_parse_xdebug_expr s =
let _e = Parse_php.xdebug_expr_of_string s in
raise Todo
+let test_parse_php_fast file =
+ let _ = Parse_php.parse_fast file in
+ ()
(*****************************************************************************)
(* Main entry for Arg *)
(*****************************************************************************)
@@ -187,6 +190,8 @@ let actions () = [
Common.mk_action_1_arg test_tokens_php;
(*e: test_parsing_php actions *)
+ "-parse_php_fast", " <file>",
+ Common.mk_action_1_arg test_parse_php_fast;
"-unparse_php", " <file>",
Common.mk_action_1_arg test_unparse_php;
"-pretty_print_php", " <file>",

0 comments on commit cd490d3

Please sign in to comment.