Skip to content
This repository has been archived by the owner on Jun 4, 2019. It is now read-only.

Commit

Permalink
cmf: optimize loading autoload file, part1
Browse files Browse the repository at this point in the history
Summary:
It currently takes 7.4s to cmf to parse the autoload file
(see cmf -profile). This patch introduces a new parsing
function for PHP which removes many features that are not
necessary for what we do on the autoload file.
It now takes 1.5s to parse the huge autoload file.

There are other optimizations we could do:
 - cache the parsed AST on the disk
 - not use the PHP parser at all and make a parsing function
   specialized for the autoload file
 - use julien's fbstrict parser (dunno if faster on such a file)
 - use hiphop parser
 - ...

The next patch will modify check_module to use this new function.

Test Plan:
$ time ~/pfff/pfff.opt -profile -parse_php_fast autoload_map.php
1.5s

Reviewers: pieter, zoel

Reviewed By: pieter

CC: julienv, andrewparoski, jakubv, ptarjan

Differential Revision: https://phabricator.fb.com/D658432

Task ID: 1915724
  • Loading branch information
pad committed Dec 14, 2012
1 parent 61f3158 commit cd490d3
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
67 changes: 67 additions & 0 deletions lang_php/parsing/parse_php.ml
Expand Up @@ -545,4 +545,71 @@ let (class_def_of_string: string -> Ast_php.class_def) = fun s ->
let lexbuf = Lexing.from_string s in
Parser_php.class_declaration_statement basic_lexer_skip_comments lexbuf

(* The default PHP parser function stores position information for all tokens,
* build some Parse_php.info_items for each toplevel entities, and
* do other things which are most of the time useful for some analysis
* but starts to really slow down parsing for huge (generated) PHP files.
* Enters parse_fast() that disables most of those things.
* Note that it may not parse correctly all PHP code, so use with
* caution.
*)
let parse_fast file =
let chan = open_in file in
let lexbuf = Lexing.from_channel chan in
Lexer_php.reset();
Lexer_php._mode_stack := [Lexer_php.INITIAL];

let rec php_next_token lexbuf =
let tok =
(* for yyless emulation *)
match !Lexer_php._pending_tokens with
| x::xs ->
Lexer_php._pending_tokens := xs;
x
| [] ->
(match Lexer_php.current_mode () with
| Lexer_php.INITIAL ->
Lexer_php.initial lexbuf
| Lexer_php.ST_IN_SCRIPTING ->
Lexer_php.st_in_scripting lexbuf
| Lexer_php.ST_IN_SCRIPTING2 ->
Lexer_php.st_in_scripting lexbuf
| Lexer_php.ST_DOUBLE_QUOTES ->
Lexer_php.st_double_quotes lexbuf
| Lexer_php.ST_BACKQUOTE ->
Lexer_php.st_backquote lexbuf
| Lexer_php.ST_LOOKING_FOR_PROPERTY ->
Lexer_php.st_looking_for_property lexbuf
| Lexer_php.ST_LOOKING_FOR_VARNAME ->
Lexer_php.st_looking_for_varname lexbuf
| Lexer_php.ST_VAR_OFFSET ->
Lexer_php.st_var_offset lexbuf
| Lexer_php.ST_START_HEREDOC s ->
Lexer_php.st_start_heredoc s lexbuf
| Lexer_php.ST_START_NOWDOC s ->
Lexer_php.st_start_nowdoc s lexbuf
| Lexer_php.ST_IN_XHP_TAG current_tag ->
Lexer_php.st_in_xhp_tag current_tag lexbuf
| Lexer_php.ST_IN_XHP_TEXT current_tag ->
Lexer_php.st_in_xhp_text current_tag lexbuf
)
in
match tok with
| Parser_php.T_COMMENT _ | Parser_php.T_DOC_COMMENT _
| Parser_php.TSpaces _ | Parser_php.TNewline _
| Parser_php.TCommentPP _
| Parser_php.T_OPEN_TAG _
| Parser_php.T_CLOSE_TAG _ ->
php_next_token lexbuf
| _ -> tok
in
try
let res = Parser_php.main php_next_token lexbuf in
close_in chan;
res
with Parsing.Parse_error ->
pr2 (spf "parsing error in php fast parser: %s"
(Lexing.lexeme lexbuf));
raise Parsing.Parse_error

(*e: parse_php.ml *)
3 changes: 3 additions & 0 deletions lang_php/parsing/parse_php.mli
Expand Up @@ -34,6 +34,9 @@ val ast_and_tokens:
val parse_any:
Common.filename -> Ast_php.any

val parse_fast:
Common.filename -> Ast_php.program

(*s: extra parse function signature *)
val xdebug_expr_of_string: string -> Ast_php.expr
val class_def_of_string: string -> Ast_php.class_def
Expand Down
5 changes: 5 additions & 0 deletions lang_php/parsing/test_parsing_php.ml
Expand Up @@ -158,6 +158,9 @@ let test_parse_xdebug_expr s =
let _e = Parse_php.xdebug_expr_of_string s in
raise Todo

let test_parse_php_fast file =
let _ = Parse_php.parse_fast file in
()
(*****************************************************************************)
(* Main entry for Arg *)
(*****************************************************************************)
Expand Down Expand Up @@ -187,6 +190,8 @@ let actions () = [
Common.mk_action_1_arg test_tokens_php;
(*e: test_parsing_php actions *)

"-parse_php_fast", " <file>",
Common.mk_action_1_arg test_parse_php_fast;
"-unparse_php", " <file>",
Common.mk_action_1_arg test_unparse_php;
"-pretty_print_php", " <file>",
Expand Down

0 comments on commit cd490d3

Please sign in to comment.