Skip to content
Browse files

callhraph_php2, database_juju, etc

  • Loading branch information...
1 parent 664f52d commit 03c6f2340cdf9febf4ee46c77edce98553cb7509 @aryx aryx committed Feb 18, 2012
View
25 lang_php/analyze/foundation/Makefile
@@ -7,35 +7,36 @@ TARGET=lib
# used to be in analyze/basic/
SRC= flag_analyze_php.ml \
entity_php.ml \
- namespace_php.ml \
+ namespace_php.ml scoping_php.ml \
comment_php.ml annotation_php.ml \
unsugar_php.ml \
- scoping_php.ml \
env_php.ml \
- include_require_php.ml define_php.ml func_php.ml class_php.ml \
+ func_php.ml class_php.ml define_php.ml include_require_php.ml \
lib_analyze_php.ml \
# used to be in ast_simple/
SRC+= ast_php_simple.ml ast_php_simple_build.ml meta_ast_php_simple.ml \
- ast_php_simple_toposort.ml \
- env_interpreter_php.ml \
- abstract_interpreter_php_helpers.ml \
- tainting_fake_php.ml abstract_interpreter_php.ml \
- unit_static_analysis_php.ml
+ ast_php_simple_toposort.ml
+
#note: tools/phpmanual_xml.ml is related to builtins_php.ml
SRC+= defs_uses_php.ml \
builtins_php.ml \
controlflow_php.ml controlflow_build_php.ml \
- callgraph_php.ml \
- database_php.ml \
+ callgraph_php.ml callgraph_php2.ml \
graph_php.ml \
+# used to be in static_analysis/
+SRC+= env_interpreter_php.ml \
+ abstract_interpreter_php_helpers.ml \
+ tainting_fake_php.ml abstract_interpreter_php.ml \
+ callgraph_php_build.ml
+SRC+= database_php.ml database_juju_php.ml
# used to be in analyze/
SRC+= tags_php.ml normalize_php.ml \
database_light_php.ml \
- unit_foundation_php.ml \
database_php_build_helpers.ml database_php_build.ml \
database_php_build2.ml \
database_prolog_php.ml \
- unit_prolog_php.ml unit_analyze_db_php.ml
+ unit_foundation_php.ml unit_analyze_db_php.ml \
+ unit_static_analysis_php.ml unit_prolog_php.ml
#old: pil, subsumed by ast_simple and abstract interpreter
# pil.ml pretty_print_pil.ml controlflow_pil.ml \
View
24 lang_php/analyze/foundation/abstract_interpreter_php.ml
@@ -20,6 +20,7 @@ open Env_interpreter_php
module A = Ast_php_simple
module Env = Env_interpreter_php
module H = Abstract_interpreter_php_helpers
+module CG = Callgraph_php2
module SMap = Map.Make (String)
@@ -67,7 +68,7 @@ let _checkpoint_heap = ref
(* for callgraph generation *)
let extract_paths = ref true
-let (graph: Env_interpreter_php.callgraph ref) = ref Map_poly.empty
+let (graph: Callgraph_php2.callgraph ref) = ref Map_poly.empty
(* throw exn instead of passing over unhandled constructs *)
let strict = ref true
@@ -90,7 +91,7 @@ exception LostControl
let save_path _env target =
if !extract_paths
- then graph := add_graph (List.hd !path) target !graph
+ then graph := CG.add_graph (List.hd !path) target !graph
let rec get_dynamic_function env heap v =
let heap, v = Ptr.get heap v in
@@ -104,7 +105,10 @@ let rec get_dynamic_function env heap v =
and get_function_list env heap = function
| [] -> raise LostControl
- | Vstring s :: _ -> heap, env.db.funs s
+ | Vstring s :: _ ->
+ (try heap, env.db.funs s
+ with Not_found -> raise (UnknownFunction s)
+ )
| _ :: rl -> get_function_list env heap rl
let rec get_string = function
@@ -156,10 +160,10 @@ module Taint = Tainting_fake_php.Taint
let rec program env heap program =
if !extract_paths
then begin
- path := [Env.FakeRoot];
+ path := [CG.FakeRoot];
List.iter (fake_root env heap) program;
end;
- path := [Env.File !(env.file)];
+ path := [CG.File !(env.file)];
let heap = stmtl env heap (exclude_toplevel_defs program) in
heap
@@ -385,7 +389,7 @@ and expr_ env heap x =
(* pad: other? *)
with (LostControl | UnknownFunction _) as exn ->
if !strict then raise exn;
- save_path env (Env.node_of_string s);
+ save_path env (CG.node_of_string s);
let heap, vl = Utils.lfold (expr env) heap el in
let res = Taint.when_call_not_found heap vl in
heap, res
@@ -677,7 +681,7 @@ and call env heap v el =
and call_fun f env heap el =
if !tracing
then Common.pr
- (Common.spf "%s->%s" (Env.string_of_node (List.hd !path))
+ (Common.spf "%s->%s" (CG.string_of_node (List.hd !path))
(unw f.f_name));
let is_clean =
let _, vl = Utils.lfold (expr env) heap el in
@@ -687,7 +691,7 @@ and call_fun f env heap el =
let n = try SMap.find (unw f.f_name) env.stack with Not_found -> 0 in
let env = { env with stack = SMap.add (unw f.f_name) (n+1) env.stack } in
(* pad: ugly, call_fun should also accept method_def *)
- save_path env (Env.node_of_string (unw f.f_name));
+ save_path env (CG.node_of_string (unw f.f_name));
(* stop when recurse in same function twice or when depth stack > 6 *)
if n >= 2 || List.length !path >= 6 && is_clean
(* || Sys.time() -. !time >= 1.0|| SMap.mem f.f_name !(env.safe) *)
@@ -700,7 +704,7 @@ and call_fun f env heap el =
let heap = parameters env heap f.f_params el in
let vars = fun_nspace f !(env.vars) in
let env = { env with vars = ref vars } in
- path := (Env.node_of_string (unw f.f_name)) :: !path;
+ path := (CG.node_of_string (unw f.f_name)) :: !path;
let heap = stmtl env heap f.f_body in
let heap, _, r = Var.get env heap "*return*" in
let heap, r = Ptr.get heap r in
@@ -901,7 +905,7 @@ and method_def env cname parent self this (heap, acc) m =
* There is a (ugly) corresponding call to node_of_string in
* call_fun().
*)
- f_name = w (Env.string_of_node (Env.Method (unw cname, unw m.m_name)));
+ f_name = w (CG.string_of_node (CG.Method (unw cname, unw m.m_name)));
f_params = m.m_params;
f_return_type = m.m_return_type;
f_body = m.m_body;
View
2 lang_php/analyze/foundation/abstract_interpreter_php.mli
@@ -13,7 +13,7 @@ val program:
val extract_paths: bool ref
-val graph: Env_interpreter_php.callgraph ref
+val graph: Callgraph_php2.callgraph ref
(* used by unit testing *)
val _checkpoint_heap:
View
62 lang_php/analyze/foundation/callgraph_php2.ml
@@ -0,0 +1,62 @@
+(* Julien Verlaguet, Yoann Padioleau
+ *
+ * Copyright (C) 2011, 2012 Facebook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation, with the
+ * special exception on linking described in file license.txt.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
+ * license.txt for more details.
+ *)
+open Common
+
+(*****************************************************************************)
+(* Prelude *)
+(*****************************************************************************)
+
+(*****************************************************************************)
+(* Types *)
+(*****************************************************************************)
+
+type node =
+ | Function of string
+ | Method of string * string
+ | File of Common.filename
+ (* used to simplify code to provoke the call to toplevel functions *)
+ | FakeRoot
+
+type callgraph = (node, node Set_poly.t) Map_poly.t
+
+(*****************************************************************************)
+(* Main entry point *)
+(*****************************************************************************)
+
+let (add_graph: node -> node -> callgraph -> callgraph) =
+ fun src target graph ->
+ let vs = try Map_poly.find src graph with Not_found -> Set_poly.empty in
+ let vs = Set_poly.add target vs in
+ Map_poly.add src vs graph
+
+(*****************************************************************************)
+(* string -> node, node -> string *)
+(*****************************************************************************)
+
+let string_of_node = function
+ | File s -> "__TOP__" ^ s
+ | Function s -> s
+ | Method (s1, s2) -> s1 ^ "::" ^ s2
+ | FakeRoot -> "__FAKE_ROOT__"
+
+let node_of_string s =
+ match s with
+ | _ when Common.(=~) s "__TOP__\\(.*\\)" ->
+ File (Common.matched1 s)
+ | _ when Common.(=~) s "\\(.*\\)::\\(.*\\)" ->
+ let (a, b) = Common.matched2 s in
+ Method (a, b)
+ | "__FAKE_ROOT__" -> FakeRoot
+ | _ -> Function s
View
14 lang_php/analyze/foundation/callgraph_php2.mli
@@ -0,0 +1,14 @@
+
+type node =
+ | Function of string
+ | Method of string * string
+ | File of Common.filename
+ (* used to simplify code to provoke the call to toplevel functions *)
+ | FakeRoot
+
+type callgraph = (node, node Set_poly.t) Map_poly.t
+
+val add_graph: node -> node -> callgraph -> callgraph
+
+val string_of_node: node -> string
+val node_of_string: string -> node
View
0 lang_php/analyze/foundation/callgraph_php_build.ml
No changes.
View
0 lang_php/analyze/foundation/callgraph_php_build.mli
No changes.
View
86 lang_php/analyze/foundation/database_juju_php.ml
@@ -0,0 +1,86 @@
+(* Julien Verlaguet, Yoann Padioleau
+ *
+ * Copyright (C) 2011, 2012 Facebook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation, with the
+ * special exception on linking described in file license.txt.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
+ * license.txt for more details.
+ *)
+open Common
+
+open Ast_php_simple
+module A = Ast_php_simple
+module Env = Env_interpreter_php
+module SMap = Map.Make (String)
+
+(*****************************************************************************)
+(* Prelude *)
+(*****************************************************************************)
+
+(*****************************************************************************)
+(* Types *)
+(*****************************************************************************)
+
+type database = {
+ funs_juju : Ast_php_simple.func_def Common.cached SMap.t ref;
+ classes_juju : Ast_php_simple.class_def Common.cached SMap.t ref;
+ constants_juju: Ast_php_simple.constant_def Common.cached SMap.t ref;
+}
+
+(*****************************************************************************)
+(* Code database *)
+(*****************************************************************************)
+let juju_db_of_files ?(show_progress=false) xs =
+ let db = {
+ funs_juju = ref SMap.empty;
+ classes_juju = ref SMap.empty;
+ constants_juju = ref SMap.empty;
+ }
+ in
+ xs +> Common_extra.progress ~show:show_progress (fun k ->
+ List.iter (fun file ->
+ k();
+ try
+ let cst = Parse_php.parse_program file in
+ let ast = Ast_php_simple_build.program cst in
+ List.iter (fun x ->
+ (* todo: print warning when duplicate class/func ? *)
+ match x with
+ | ClassDef c ->
+ db.classes_juju :=
+ SMap.add (A.unwrap c.c_name) (Common.serial c) !(db.classes_juju)
+ | FuncDef fd ->
+ db.funs_juju :=
+ SMap.add (A.unwrap fd.f_name) (Common.serial fd) !(db.funs_juju)
+ | ConstantDef c ->
+ db.constants_juju :=
+ SMap.add (A.unwrap c.cst_name) (Common.serial c) !(db.constants_juju)
+
+ | (Global _|StaticVars _
+ |Try (_, _, _)|Throw _
+ |Continue _|Break _|Return _
+ |Foreach (_, _, _, _)|For (_, _, _, _)|Do (_, _)|While (_, _)
+ |Switch (_, _)|If (_, _, _)
+ |Block _|Expr _
+ ) -> ()
+ ) ast
+ with e ->
+ Common.pr2 (spf "ERROR in %s, exn = %s" file (Common.exn_to_s e))
+ ));
+ db
+
+(* todo: what if multiple matches?? *)
+let code_database_of_juju_db db = { Env.
+ funs = (fun s -> let f = SMap.find s !(db.funs_juju) in
+ Common.unserial f);
+ classes = (fun s -> let c = SMap.find s !(db.classes_juju) in
+ Common.unserial c);
+ constants = (fun s -> let c = SMap.find s !(db.constants_juju) in
+ Common.unserial c);
+ }
View
9 lang_php/analyze/foundation/database_juju_php.mli
@@ -0,0 +1,9 @@
+
+type database
+
+val juju_db_of_files:
+ ?show_progress:bool ->
+ Common.filename list -> database
+
+val code_database_of_juju_db:
+ database -> Env_interpreter_php.code_database
View
50 lang_php/analyze/foundation/database_prolog_php.ml
@@ -24,6 +24,7 @@ module E = Database_code
module Env = Env_interpreter_php
module Interp = Abstract_interpreter_php
+module CG = Callgraph_php2
open Env_interpreter_php
(*****************************************************************************)
@@ -103,10 +104,10 @@ let name_id id db =
failwith (spf "could not find name for id %s" (Db.str_of_id id db))
let name_of_node = function
- | Env.File s -> spf "'__TOPSTMT__%s'" s
- | Env.Function s -> spf "'%s'" s
- | Env.Method (s1, s2) -> spf "('%s', '%s')" s1 s2
- | Env.FakeRoot -> "'__FAKE_ROOT__'"
+ | CG.File s -> spf "'__TOPSTMT__%s'" s
+ | CG.Function s -> spf "'%s'" s
+ | CG.Method (s1, s2) -> spf "('%s', '%s')" s1 s2
+ | CG.FakeRoot -> "'__FAKE_ROOT__'"
(* quite similar to database_code.string_of_id_kind *)
let string_of_id_kind = function
@@ -421,65 +422,36 @@ let gen_prolog_db ?show_progress a b =
* - detect higher order functions so that function call
* through generic higher order functions is present in callgraph
*)
-let append_callgraph_to_prolog_db2 ?(show_progress=true) db file =
+let append_callgraph_to_prolog_db2 ?(show_progress=true) g file =
let h_oldcallgraph = Hashtbl.create 101 in
file +> Common.cat +> List.iter (fun s ->
if s =~ "^docall(.*"
then Hashtbl.add h_oldcallgraph s true
);
- let all_files =
- db.Db.file_info#tolist +> List.map fst in
- let db =
- Env.code_database_of_juju_db (Env.juju_db_of_files all_files) in
-
- Common.save_excursion Abstract_interpreter_php.extract_paths true (fun()->
- Common.save_excursion Abstract_interpreter_php.strict false (fun()->
- Abstract_interpreter_php.graph := Map_poly.empty;
-
- all_files +> Common_extra.progress ~show:show_progress (fun k ->
- List.iter (fun file ->
- k ();
- let ast =
- try
- Ast_php_simple_build.program (Parse_php.parse_program file)
- with Ast_php_simple_build.TodoConstruct s ->
- []
- in
- let env =
- Env_interpreter_php.empty_env db file in
- let heap =
- Env_interpreter_php.empty_heap in
- let _heap = Abstract_interpreter_php.program env heap ast in
- ()
- ))
- ));
-
(* look previous information, to avoid introduce duplication
* todo: and also to check/compare with the abstract interpreter.
* Should be a superset.
* - should find more functions when can resolve statically dynamic funcall
* -
*)
-
Common.with_open_outfile_append file (fun (pr, _chan) ->
let pr s = pr (s ^ "\n") in
- let g = !(Abstract_interpreter_php.graph) in
pr "";
g +> Map_poly.iter (fun src xs ->
xs +> Set_poly.iter (fun target ->
let kind =
match target with
(* can't call a file ... *)
- | Env.File _ -> raise Impossible
+ | CG.File _ -> raise Impossible
(* can't call a fake root*)
- | Env.FakeRoot -> raise Impossible
- | Env.Function _ -> "function"
- | Env.Method _ -> "method"
+ | CG.FakeRoot -> raise Impossible
+ | CG.Function _ -> "function"
+ | CG.Method _ -> "method"
in
(* do not count those fake edges *)
- if src <> Env.FakeRoot
+ if src <> CG.FakeRoot
then begin
let s =(spf "docall(%s, %s, %s)."
(name_of_node src) (name_of_node target) kind) in
View
2 lang_php/analyze/foundation/database_prolog_php.mli
@@ -5,4 +5,4 @@ val gen_prolog_db:
val append_callgraph_to_prolog_db:
?show_progress:bool ->
- Database_php.database -> Common.filename -> unit
+ Callgraph_php2.callgraph -> Common.filename -> unit
View
91 lang_php/analyze/foundation/env_interpreter_php.ml
@@ -16,6 +16,7 @@ open Common
open Ast_php_simple
module A = Ast_php_simple
+module CG = Callgraph_php2
module Int = struct type t = int let compare = (-) end
module ISet = Set.Make (Int)
@@ -121,21 +122,6 @@ and env = {
stack : int SMap.t;
}
-type code_database_juju = {
- funs_juju : Ast_php_simple.func_def Common.cached SMap.t ref;
- classes_juju : Ast_php_simple.class_def Common.cached SMap.t ref;
- constants_juju: Ast_php_simple.constant_def Common.cached SMap.t ref;
-}
-
-type node =
- | Function of string
- | Method of string * string
- | File of Common.filename
- (* used to simplify code to provoke the call to toplevel functions *)
- | FakeRoot
-
-type callgraph = (node, node Set_poly.t) Map_poly.t
-
(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)
@@ -158,81 +144,6 @@ let empty_env db file =
db = db;
}
-let string_of_node = function
- | File s -> "__TOP__" ^ s
- | Function s -> s
- | Method (s1, s2) -> s1 ^ "::" ^ s2
- | FakeRoot -> "__FAKE_ROOT__"
-
-let node_of_string s =
- match s with
- | _ when Common.(=~) s "__TOP__\\(.*\\)" ->
- File (Common.matched1 s)
- | _ when Common.(=~) s "\\(.*\\)::\\(.*\\)" ->
- let (a, b) = Common.matched2 s in
- Method (a, b)
- | "__FAKE_ROOT__" -> FakeRoot
- | _ -> Function s
-
-
-let (add_graph: node -> node -> callgraph -> callgraph) =
- fun src target graph ->
- let vs = try Map_poly.find src graph with Not_found -> Set_poly.empty in
- let vs = Set_poly.add target vs in
- Map_poly.add src vs graph
-
-(*****************************************************************************)
-(* Code database *)
-(*****************************************************************************)
-let juju_db_of_files ?(show_progress=false) xs =
- let db = {
- funs_juju = ref SMap.empty;
- classes_juju = ref SMap.empty;
- constants_juju = ref SMap.empty;
- }
- in
- xs +> Common_extra.progress ~show:show_progress (fun k ->
- List.iter (fun file ->
- k();
- try
- let cst = Parse_php.parse_program file in
- let ast = Ast_php_simple_build.program cst in
- List.iter (fun x ->
- (* todo: print warning when duplicate class/func ? *)
- match x with
- | ClassDef c ->
- db.classes_juju :=
- SMap.add (A.unwrap c.c_name) (Common.serial c) !(db.classes_juju)
- | FuncDef fd ->
- db.funs_juju :=
- SMap.add (A.unwrap fd.f_name) (Common.serial fd) !(db.funs_juju)
- | ConstantDef c ->
- db.constants_juju :=
- SMap.add (A.unwrap c.cst_name) (Common.serial c) !(db.constants_juju)
-
- | (Global _|StaticVars _
- |Try (_, _, _)|Throw _
- |Continue _|Break _|Return _
- |Foreach (_, _, _, _)|For (_, _, _, _)|Do (_, _)|While (_, _)
- |Switch (_, _)|If (_, _, _)
- |Block _|Expr _
- ) -> ()
- ) ast
- with e ->
- Common.pr2 (spf "ERROR in %s, exn = %s" file (Common.exn_to_s e))
- ));
- db
-
-(* todo: what if multiple matches?? *)
-let code_database_of_juju_db db = {
- funs = (fun s -> let f = SMap.find s !(db.funs_juju) in
- Common.unserial f);
- classes = (fun s -> let c = SMap.find s !(db.classes_juju) in
- Common.unserial c);
- constants = (fun s -> let c = SMap.find s !(db.constants_juju) in
- Common.unserial c);
- }
-
(*****************************************************************************)
(* String-of like *)
(*****************************************************************************)
View
3 lang_php/analyze/foundation/unit_prolog_php.ml
@@ -36,8 +36,11 @@ let prolog_query ~file query =
let db = Database_php_build.db_of_files_or_dirs [source_file] in
Database_prolog_php.gen_prolog_db
~show_progress:false db facts_pl_file;
+ (*
Database_prolog_php.append_callgraph_to_prolog_db
~show_progress:false db facts_pl_file;
+ TODO get callgraph
+ *)
(* debug: Common.cat facts_pl_file +> List.iter pr2; *)
let cmd =
View
17 lang_php/analyze/foundation/unit_static_analysis_php.ml
@@ -5,6 +5,7 @@ open OUnit
open Env_interpreter_php
module Env = Env_interpreter_php
module Interp = Abstract_interpreter_php
+module Db = Database_juju_php
(*****************************************************************************)
(* Prelude *)
@@ -18,16 +19,16 @@ let prepare content =
let tmp_file =
Parse_php.tmp_php_file_from_string content in
let db =
- Env.code_database_of_juju_db (Env.juju_db_of_files [tmp_file]) in
+ Db.code_database_of_juju_db (Db.juju_db_of_files [tmp_file]) in
let env =
- Env_interpreter_php.empty_env db tmp_file in
+ Env.empty_env db tmp_file in
let ast =
Ast_php_simple_build.program (Parse_php.parse_program tmp_file) in
env, ast
let heap_of_program_at_checkpoint content =
let (env, ast) = prepare content in
- let heap = Env_interpreter_php.empty_heap in
+ let heap = Env.empty_heap in
Common.save_excursion Abstract_interpreter_php.extract_paths false (fun()->
Common.save_excursion Abstract_interpreter_php.strict true (fun()->
let _heap = Abstract_interpreter_php.program env heap ast in
@@ -38,7 +39,7 @@ let heap_of_program_at_checkpoint content =
let callgraph_generation content =
let (env, ast) = prepare content in
- let heap = Env_interpreter_php.empty_heap in
+ let heap = Env.empty_heap in
Common.save_excursion Abstract_interpreter_php.extract_paths true (fun()->
Common.save_excursion Abstract_interpreter_php.strict true (fun()->
Abstract_interpreter_php.graph := Map_poly.empty;
@@ -92,11 +93,11 @@ let assert_graph file xs =
let _nb_nodes = List.length xs in
xs +> List.iter (fun (s, expected) ->
try
- let n = Env.node_of_string s in
+ let n = CG.node_of_string s in
let actual_child =
Map_poly.find n g
+> Set_poly.elements
- +> List.map Env.string_of_node
+ +> List.map CG.string_of_node
in
assert_equal
~msg:"it should have the expected callees"
@@ -292,10 +293,10 @@ function bar() { foo(); }
" in
(* note: I don't use assert_graph for teaching purpose here *)
let g = callgraph_generation file in
- let xs = Map_poly.find (Env.Function "bar") g +> Set_poly.elements in
+ let xs = Map_poly.find (CG.Function "bar") g +> Set_poly.elements in
assert_equal
~msg:"it should handle simple direct calls:"
- [Env.Function "foo"]
+ [CG.Function "foo"]
xs;
let file = "

0 comments on commit 03c6f23

Please sign in to comment.
Something went wrong with that request. Please try again.