Skip to content
This repository has been archived by the owner on Jun 4, 2019. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Reuse the AST so multiple pattern matching is more efficient.
  • Loading branch information
Mike Shema committed Apr 3, 2015
1 parent 7734663 commit 39b0933
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 83 deletions.
42 changes: 22 additions & 20 deletions lang_php/matcher/sgrep_php.ml
Expand Up @@ -65,38 +65,26 @@ let parse str =
(* Main entry point *)
(*****************************************************************************)

let sgrep ?(case_sensitive=false) ~hook pattern file =
let ast =
try
Parse_php.parse_program file
with Parse_php.Parse_error _err ->
(* we usually do sgrep on a set of files or directories,
* so we don't want on error in one file to stop the
* whole process.
*)
Common.pr2 (spf "warning: parsing problem in %s" file);
[]
in

let sgrep_ast ?(case_sensitive=false) ~hook pattern ast =
(* coupling: copy paste with lang_php/matcher/spatch_php.ml
* coupling: copy paste with sgrep_lint
*)
let hook =
let hook =
match pattern with
| Expr (XhpHtml xhp) ->
{ V.default_visitor with
V.kxhp_html = (fun (k, _) x ->
let matches_with_env =
let matches_with_env =
Matching_php.match_xhp_xhp xhp x
in
in
if matches_with_env = []
then k x
else begin
(* could also recurse to find nested matching inside
* the matched code itself.
*)
let matched_tokens = Lib_parsing_php.ii_of_any (XhpHtml2 x) in
matches_with_env +> List.iter (fun env ->
matches_with_env +> List.iter (fun env ->
hook env matched_tokens
)
end
Expand All @@ -106,7 +94,7 @@ let sgrep ?(case_sensitive=false) ~hook pattern file =
| Expr pattern_expr ->
{ V.default_visitor with
V.kexpr = (fun (k, _) x ->
let matches_with_env =
let matches_with_env =
Matching_php.match_e_e pattern_expr x
in
if matches_with_env = []
Expand All @@ -126,7 +114,7 @@ let sgrep ?(case_sensitive=false) ~hook pattern file =
| Stmt2 pattern ->
{ V.default_visitor with
V.kstmt = (fun (k, _) x ->
let matches_with_env =
let matches_with_env =
Matching_php.match_st_st pattern x
in
if matches_with_env = []
Expand Down Expand Up @@ -163,11 +151,25 @@ let sgrep ?(case_sensitive=false) ~hook pattern file =
);
}

| _ -> failwith (spf "pattern not yet supported:" ^
| _ -> failwith (spf "pattern not yet supported:" ^
Export_ast_php.ml_pattern_string_of_any pattern)
in
(* opti ? dont analyze func if no constant in it ?*)
Common.save_excursion Php_vs_php.case_sensitive case_sensitive (fun() ->
(V.mk_visitor hook) (Program ast)
)

let sgrep ?(case_sensitive=false) ~hook pattern file =
let ast =
try
Parse_php.parse_program file
with Parse_php.Parse_error _err ->
(* we usually do sgrep on a set of files or directories,
* so we don't want on error in one file to stop the
* whole process.
*)
Common.pr2 (spf "warning: parsing problem in %s" file);
[]
in
sgrep_ast ~case_sensitive ~hook pattern ast

5 changes: 5 additions & 0 deletions lang_php/matcher/sgrep_php.mli
Expand Up @@ -9,3 +9,8 @@ val sgrep:
?case_sensitive: bool ->
hook:(Metavars_php.metavars_binding -> Ast_php.info list -> unit) ->
Ast_php.any -> Common.filename -> unit

val sgrep_ast:
?case_sensitive: bool ->
hook:(Metavars_php.metavars_binding -> Ast_php.info list -> unit) ->
Ast_php.any -> Ast_php.program -> unit
125 changes: 62 additions & 63 deletions main_sgrep.ml
Expand Up @@ -145,6 +145,41 @@ let ast_fuzzy_of_string str =
(* Language specific *)
(*****************************************************************************)

type ast_t =
| Fuzzy of Ast_fuzzy.tree list
| Php of Ast_php.program

let create_ast file =
match !lang with
| "php" ->
Php
(try
(Parse_php.parse_program file)
with Parse_php.Parse_error _err ->
Common.pr2 (spf "warning: parsing problem in %s" file);
[])
| _ ->
Fuzzy
(try
(match !lang with
| ("c" | "c++") ->
Common.save_excursion Flag_parsing_cpp.verbose_lexing false (fun () ->
Parse_cpp.parse_fuzzy file +> fst
)
| "java" ->
Parse_java.parse_fuzzy file +> fst
| "js" ->
Parse_js.parse_fuzzy file +> fst
| "ml" ->
Parse_ml.parse_fuzzy file +> fst
| "phpfuzzy" ->
Parse_php.parse_fuzzy file +> fst
| _ ->
failwith ("unsupported language: " ^ !lang))
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[])

let parse_pattern str =
match !lang with
| "php" -> Left (Sgrep_php.parse str)
Expand All @@ -155,93 +190,56 @@ let parse_pattern str =
Right (ast_fuzzy_of_string str)
| _ -> failwith ("unsupported language: " ^ !lang)

let sgrep pattern file =
match !lang, pattern with
| "php", Left pattern ->
Sgrep_php.sgrep
~case_sensitive:!case_sensitive
~hook:(fun env matched_tokens ->
print_match !mvars env Lib_parsing_php.ii_of_any matched_tokens
)
pattern
file
| ("c" | "c++"), Right pattern ->
let ast =
try
Common.save_excursion Flag_parsing_cpp.verbose_lexing false (fun () ->
Parse_cpp.parse_fuzzy file +> fst
)
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[]
in
let read_patterns name =
let ic = open_in name in
let try_read () =
try Some (input_line ic) with End_of_file -> None in
let rec loop acc = match try_read () with
| Some s -> loop ((parse_pattern s) :: acc)
| None -> close_in ic; List.rev acc in
loop []

let sgrep_ast pattern any_ast =
match !lang, pattern, any_ast with
| ("c" | "c++"), Right pattern, Fuzzy ast ->
Sgrep_fuzzy.sgrep
~hook:(fun env matched_tokens ->
print_match !mvars env Ast_fuzzy.toks_of_trees matched_tokens
)
pattern ast
| "ml", Right pattern ->
let ast =
try
Parse_ml.parse_fuzzy file +> fst
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[]
in
| "java", Right pattern, Fuzzy ast ->
Sgrep_fuzzy.sgrep
~hook:(fun env matched_tokens ->
print_match !mvars env Ast_fuzzy.toks_of_trees matched_tokens
)
pattern ast
| "phpfuzzy", Right pattern ->
let ast =
try
Parse_php.parse_fuzzy file +> fst
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[]
in
| "js", Right pattern, Fuzzy ast ->
Sgrep_fuzzy.sgrep
~hook:(fun env matched_tokens ->
print_match !mvars env Ast_fuzzy.toks_of_trees matched_tokens
)
pattern ast
| "java", Right pattern ->
let ast =
try
Parse_java.parse_fuzzy file +> fst
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[]
in
| "ml", Right pattern, Fuzzy ast ->
Sgrep_fuzzy.sgrep
~hook:(fun env matched_tokens ->
print_match !mvars env Ast_fuzzy.toks_of_trees matched_tokens
)
pattern ast
| "js", Right pattern ->
let ast =
try
Parse_js.parse_fuzzy file +> fst
with exn ->
pr2 (spf "PB with %s, exn = %s" file (Common.exn_to_s exn));
[]
in
| "php", Left pattern, Php ast ->
Sgrep_php.sgrep_ast
~case_sensitive:!case_sensitive
~hook:(fun env matched_tokens ->
print_match !mvars env Lib_parsing_php.ii_of_any matched_tokens
)
pattern ast
| "phpfuzzy", Right pattern, Fuzzy ast ->
Sgrep_fuzzy.sgrep
~hook:(fun env matched_tokens ->
print_match !mvars env Ast_fuzzy.toks_of_trees matched_tokens
)
pattern ast
| _ -> failwith ("unsupported language: " ^ !lang)

let read_patterns name =
let ic = open_in name in
let try_read () =
try Some (input_line ic) with End_of_file -> None in
let rec loop acc = match try_read () with
| Some s -> loop ((parse_pattern s) :: acc)
| None -> close_in ic; List.rev acc in
loop []
| _ ->
failwith ("unsupported language: " ^ !lang)

(*****************************************************************************)
(* Main action *)
Expand Down Expand Up @@ -269,7 +267,8 @@ let main_action xs =

files +> List.iter (fun file ->
if !verbose then pr2 (spf "processing: %s" file);
let sgrep pattern = sgrep pattern file in
let ast = create_ast file in
let sgrep pattern = sgrep_ast pattern ast in
List.iter sgrep patterns
);

Expand Down

0 comments on commit 39b0933

Please sign in to comment.