Permalink
Browse files

[codequery] start to extract location information for the bytecode

Summary:
The bytecode actually has very basic location information so for now
I abuse graph_code_java.ml which has more information and I try to
map bytecode<->java. For now I just wrote tests for simple classsnames.
It will not work for nested classes, generic classes, etc, I think,
but it's a start.

Trying to connect the graph_code for bytecode and Java is also a good
way to understand how things are transformed from one to the other.

Test Plan:
included unit tests for at/3
also
$ cd ~/pfff/tests/bytecode/codegraph
$ ~/pfff/codequery -lang bytecode -build .
$ head facts.pl
:- discontiguous kind/2, at/3.
kind('Clone', class).
at('Clone', '/data/users/pad/github/pfff/tests/bytecode/codegraph/Clone.java', 1).
kind(('Clone', '<init>'), method).
kind(('Clone', 'foo'), method).
kind('TestException', class).
at('TestException', '/data/users/pad/github/pfff/tests/bytecode/codegraph/TestException.java', 1).
kind(('TestException', '<init>'), method).

Reviewers: pieter

Reviewed By: pieter

CC: subodh, platform-diffs@lists

Differential Revision: https://phabricator.fb.com/D654268
  • Loading branch information...
1 parent 1e91621 commit 13bb232e8a711e238db9c5c9cbb7ffe69400aacd @aryx aryx committed Dec 11, 2012
View
@@ -236,10 +236,10 @@ LIBS= commons/commons.cma \
lang_cpp/analyze/lib.cma \
lang_c/parsing/lib.cma \
lang_c/analyze/lib.cma \
- lang_bytecode/parsing/lib.cma \
- lang_bytecode/analyze/lib.cma \
lang_java/parsing/lib.cma \
lang_java/analyze/lib.cma \
+ lang_bytecode/parsing/lib.cma \
+ lang_bytecode/analyze/lib.cma \
lang_python/parsing/lib.cma \
lang_python/analyze/lib.cma \
lang_csharp/parsing/lib.cma \
@@ -643,6 +643,9 @@ db:
./pfff_db -verbose -lang ml -o DB_LIGHT.marshall .
graph:
./codegraph -lang cmt -build .
+prolog:
+ ./codequery -lang cmt -build .
+ rm -f facts.pl
layers:
./pfff_db_heavy -gen_age_layer /home/pad/local/pfff-for-layers \
layer_age.marshall
@@ -25,6 +25,7 @@ INCLUDEDIRS= $(TOP)/commons \
$(TOP)/h_program-lang $(TOP)/h_version-control \
$(TOP)/globals \
$(TOP)/external/javalib/src \
+ $(TOP)/lang_java/analyze \
../parsing \
##############################################################################
@@ -28,11 +28,25 @@ open JClassLow
* for more information.
*
* As opposed to lang_java/analyze/graph_code_java.ml, no need for:
- * - package lookup (all names are resolved already)
- * - nested classes are compiled in another class with a $ suffix
- * - generics?
+ * - package lookup, all names are resolved in the bytecode
+ * (still need a class lookup for fields/methods though ...)
+ * - handling nested classes, they are compiled in another class
+ * with a $ suffix
+ * - handling generics?
+ * - type checking to resolve certain method calls, the bytecode is fully
+ * typed (a bit like TAL), one can get the type of each 'invoke' opcode
*
- * Still need a class lookup for fields/methods though ...
+ * I now pass a graph_code_java as a parameter to get the source code
+ * location for the entities. The bytecode has attributes
+ * such as AttributeSourceFile (and AttributeLineNumberTable) but:
+ * - the filename there does not have any directory information, so
+ * one would need to look for all java files with this name, parse
+ * them, and extract the package name in it to disambiguate
+ * - some opcodes have entries in a LineNumberTable, but what about
+ * empty methods? and what about the LineNumberTable for the class?
+ * So for now I just abuse the graph_code for java and try to map a
+ * node in graph_code_bytecode to a node in graph_code_java (which by
+ * side effects help understand how things are translated).
*
* todo: StaticMethod, StaticField, the bytecode has this information
* less: put back nested classes inside the other
@@ -93,7 +107,6 @@ let package_and_name_of_cname class_name =
package_and_name_of_str name
-
(* quite similar to create_intermediate_directories_if_not_present *)
let create_intermediate_packages_if_not_present g root xs =
@@ -191,7 +204,7 @@ let lookup g n s =
(*****************************************************************************)
(* Defs *)
(*****************************************************************************)
-let extract_defs ~g ast =
+let extract_defs ~g ~file ~graph_code_java ast =
let jclass = ast in
let (package, name) = package_and_name_of_cname jclass.j_name in
@@ -200,6 +213,14 @@ let extract_defs ~g ast =
let node = (name, E.Class E.RegularClass) in
g +> G.add_node node;
g +> G.add_edge (current, node) G.Has;
+ graph_code_java +> Common.do_option (fun g2 ->
+ try
+ let nodeinfo = G.nodeinfo node g2 in
+ g +> G.add_nodeinfo node nodeinfo
+ with Not_found ->
+ pr2 (spf "could not find the corresponding nodeinfo in the java graph: %s"
+ (G.string_of_node node))
+ );
let current = node in
@@ -370,7 +391,7 @@ and code env x =
(* Main entry point *)
(*****************************************************************************)
-let build ?(verbose=true) dir_or_file skip_list =
+let build ?(verbose=true) ?(graph_code_java=None) dir_or_file skip_list =
let root = Common.realpath dir_or_file in
let all_files =
Lib_parsing_bytecode.find_source_files_of_dir_or_files [root] in
@@ -392,7 +413,7 @@ let build ?(verbose=true) dir_or_file skip_list =
* folloing creation of classes under com will then finish
* under EXTERNAL too
*)
- extract_defs ~g ast;
+ extract_defs ~g ~file ~graph_code_java ast;
()
));
@@ -1,5 +1,6 @@
val build:
?verbose:bool ->
+ ?graph_code_java:Graph_code.graph option ->
Common.path -> Skip_code.skip list ->
Graph_code.graph
@@ -36,7 +36,13 @@ let prolog_query ~files query =
*)
(files +> List.map fst +> Common.join " "));
let skip_list = [] in
- let g = Graph_code_bytecode.build ~verbose:verbose tmp_dir skip_list in
+ let graph_code_java =
+ Some (Graph_code_java.build ~verbose:verbose ~only_defs:true
+ tmp_dir skip_list)
+ in
+ let g =
+ Graph_code_bytecode.build ~verbose:verbose ~graph_code_java
+ tmp_dir skip_list in
let facts = Graph_code_prolog.build tmp_dir g in
let facts_pl_file = Filename.concat tmp_dir "facts.pl" in
Common.with_open_outfile facts_pl_file (fun (pr_no_nl, _chan) ->
@@ -85,6 +91,16 @@ class Bar {
["method"] (prolog_query ~files "kind(('Bar','g'), X), writeln(X)");
);
+ "at" >:: (fun () ->
+ let files = [
+"Foo.java", " // line 1
+ // line 2
+class Foo { // line 3
+}
+";] in
+ assert_equal
+ ["3"] (prolog_query ~files "at('Foo', _, X), writeln(X)")
+ );
])
(*****************************************************************************)
View
@@ -227,7 +227,11 @@ let build_graph_code lang root =
| "web" -> raise Todo
| "c" -> Graph_code_c.build ~verbose:!verbose root skip_list
| "java" -> Graph_code_java.build ~verbose:!verbose root skip_list
- | "bytecode" -> Graph_code_bytecode.build ~verbose:!verbose root skip_list
+ | "bytecode" ->
+ let graph_code_java = Some (Graph_code_java.build ~verbose:!verbose
+ ~only_defs:true root skip_list) in
+ Graph_code_bytecode.build ~verbose:!verbose ~graph_code_java
+ root skip_list
| "cmt" -> Graph_code_cmt.build ~verbose:!verbose root skip_list
| _ -> failwith ("language not supported: " ^ lang)
in
View
@@ -106,7 +106,12 @@ let build_prolog_db lang root =
| "cmt" ->
Graph_code_cmt.build ~verbose:!verbose root skip_list
| "bytecode" ->
- Graph_code_bytecode.build ~verbose:!verbose root skip_list
+ let graph_code_java =
+ Some (Graph_code_java.build ~verbose:!verbose ~only_defs:true
+ root skip_list)
+ in
+ Graph_code_bytecode.build ~verbose:!verbose ~graph_code_java
+ root skip_list
| _ -> raise Impossible
in
let facts = Graph_code_prolog.build root g in
@@ -1,6 +1,13 @@
class Clone {
+
+
+
public Object foo() throws java.lang.CloneNotSupportedException {
+
+
int x = 1;
return this.clone();
+
+
}
}

0 comments on commit 13bb232

Please sign in to comment.