Skip to content
This repository
Browse code

[codequery] start to extract location information for the bytecode

Summary:
The bytecode actually has very basic location information so for now
I abuse graph_code_java.ml which has more information and I try to
map bytecode<->java. For now I just wrote tests for simple classsnames.
It will not work for nested classes, generic classes, etc, I think,
but it's a start.

Trying to connect the graph_code for bytecode and Java is also a good
way to understand how things are transformed from one to the other.

Test Plan:
included unit tests for at/3
also
$ cd ~/pfff/tests/bytecode/codegraph
$ ~/pfff/codequery -lang bytecode -build .
$ head facts.pl
:- discontiguous kind/2, at/3.
kind('Clone', class).
at('Clone', '/data/users/pad/github/pfff/tests/bytecode/codegraph/Clone.java', 1).
kind(('Clone', '<init>'), method).
kind(('Clone', 'foo'), method).
kind('TestException', class).
at('TestException', '/data/users/pad/github/pfff/tests/bytecode/codegraph/TestException.java', 1).
kind(('TestException', '<init>'), method).

Reviewers: pieter

Reviewed By: pieter

CC: subodh, platform-diffs@lists

Differential Revision: https://phabricator.fb.com/D654268
  • Loading branch information...
commit 13bb232e8a711e238db9c5c9cbb7ffe69400aacd 1 parent 1e91621
Yoann Padioleau authored December 10, 2012
7  Makefile
@@ -236,10 +236,10 @@ LIBS= commons/commons.cma \
236 236
      lang_cpp/analyze/lib.cma \
237 237
     lang_c/parsing/lib.cma \
238 238
      lang_c/analyze/lib.cma \
239  
-    lang_bytecode/parsing/lib.cma \
240  
-     lang_bytecode/analyze/lib.cma \
241 239
     lang_java/parsing/lib.cma \
242 240
      lang_java/analyze/lib.cma \
  241
+    lang_bytecode/parsing/lib.cma \
  242
+     lang_bytecode/analyze/lib.cma \
243 243
     lang_python/parsing/lib.cma \
244 244
      lang_python/analyze/lib.cma \
245 245
     lang_csharp/parsing/lib.cma \
@@ -643,6 +643,9 @@ db:
643 643
 	./pfff_db -verbose  -lang ml -o DB_LIGHT.marshall .
644 644
 graph:
645 645
 	./codegraph -lang cmt -build .
  646
+prolog:
  647
+	./codequery -lang cmt -build .
  648
+	rm -f facts.pl
646 649
 layers:
647 650
 	./pfff_db_heavy -gen_age_layer /home/pad/local/pfff-for-layers \
648 651
           layer_age.marshall
1  lang_bytecode/analyze/Makefile
@@ -25,6 +25,7 @@ INCLUDEDIRS= $(TOP)/commons \
25 25
    $(TOP)/h_program-lang $(TOP)/h_version-control  \
26 26
    $(TOP)/globals \
27 27
    $(TOP)/external/javalib/src \
  28
+   $(TOP)/lang_java/analyze \
28 29
    ../parsing \
29 30
 
30 31
 ##############################################################################
37  lang_bytecode/analyze/graph_code_bytecode.ml
@@ -28,11 +28,25 @@ open JClassLow
28 28
  * for more information.
29 29
  * 
30 30
  * As opposed to lang_java/analyze/graph_code_java.ml, no need for:
31  
- *  - package lookup (all names are resolved already)
32  
- *  - nested classes are compiled in another class with a $ suffix
33  
- *  - generics?
  31
+ *  - package lookup, all names are resolved in the bytecode
  32
+ *    (still need a class lookup for fields/methods though ...)
  33
+ *  - handling nested classes, they are compiled in another class
  34
+ *    with a $ suffix
  35
+ *  - handling generics?
  36
+ *  - type checking to resolve certain method calls, the bytecode is fully
  37
+ *    typed (a bit like TAL), one can get the type of each 'invoke' opcode
34 38
  * 
35  
- * Still need a class lookup for fields/methods though ...
  39
+ * I now pass a graph_code_java as a parameter to get the source code
  40
+ * location for the entities. The bytecode has attributes
  41
+ * such as AttributeSourceFile (and AttributeLineNumberTable) but:
  42
+ *  - the filename there does not have any directory information, so
  43
+ *    one would need to look for all java files with this name, parse
  44
+ *    them, and extract the package name in it to disambiguate
  45
+ *  - some opcodes have entries in a LineNumberTable, but what about
  46
+ *    empty methods? and what about the LineNumberTable for the class?
  47
+ * So for now I just abuse the graph_code for java and try to map a
  48
+ * node in graph_code_bytecode to a node in graph_code_java (which by
  49
+ * side effects help understand how things are translated).
36 50
  * 
37 51
  * todo: StaticMethod, StaticField, the bytecode has this information
38 52
  * less: put back nested classes inside the other
@@ -93,7 +107,6 @@ let package_and_name_of_cname class_name =
93 107
   package_and_name_of_str name
94 108
 
95 109
 
96  
-
97 110
 (* quite similar to create_intermediate_directories_if_not_present *)
98 111
 let create_intermediate_packages_if_not_present g root xs =
99 112
 
@@ -191,7 +204,7 @@ let lookup g n s =
191 204
 (*****************************************************************************)
192 205
 (* Defs *)
193 206
 (*****************************************************************************)
194  
-let extract_defs ~g ast =
  207
+let extract_defs ~g ~file ~graph_code_java ast =
195 208
   let jclass = ast in
196 209
 
197 210
   let (package, name) = package_and_name_of_cname jclass.j_name in
@@ -200,6 +213,14 @@ let extract_defs ~g ast =
200 213
   let node = (name, E.Class E.RegularClass) in
201 214
   g +> G.add_node node;
202 215
   g +> G.add_edge (current, node) G.Has;
  216
+  graph_code_java +> Common.do_option (fun g2 ->
  217
+    try 
  218
+      let nodeinfo = G.nodeinfo node g2 in
  219
+      g +> G.add_nodeinfo node nodeinfo
  220
+    with Not_found ->
  221
+      pr2 (spf "could not find the corresponding nodeinfo in the java graph: %s"
  222
+             (G.string_of_node node))
  223
+  );
203 224
 
204 225
   let current = node in
205 226
 
@@ -370,7 +391,7 @@ and code env x =
370 391
 (* Main entry point *)
371 392
 (*****************************************************************************)
372 393
 
373  
-let build ?(verbose=true) dir_or_file skip_list =
  394
+let build ?(verbose=true) ?(graph_code_java=None) dir_or_file skip_list =
374 395
   let root = Common.realpath dir_or_file in
375 396
   let all_files = 
376 397
     Lib_parsing_bytecode.find_source_files_of_dir_or_files [root] in
@@ -392,7 +413,7 @@ let build ?(verbose=true) dir_or_file skip_list =
392 413
        * folloing creation of classes under com will then finish 
393 414
        * under EXTERNAL too
394 415
        *)
395  
-      extract_defs ~g ast;
  416
+      extract_defs ~g ~file ~graph_code_java ast;
396 417
       ()
397 418
     ));
398 419
 
1  lang_bytecode/analyze/graph_code_bytecode.mli
... ...
@@ -1,5 +1,6 @@
1 1
 
2 2
 val build:
3 3
   ?verbose:bool -> 
  4
+  ?graph_code_java:Graph_code.graph option ->
4 5
   Common.path -> Skip_code.skip list ->
5 6
   Graph_code.graph
18  lang_bytecode/analyze/unit_analyze_bytecode.ml
@@ -36,7 +36,13 @@ let prolog_query ~files query =
36 36
                         *)
37 37
                        (files +> List.map fst +> Common.join " "));
38 38
     let skip_list = [] in
39  
-    let g = Graph_code_bytecode.build ~verbose:verbose tmp_dir skip_list in
  39
+    let graph_code_java = 
  40
+      Some (Graph_code_java.build ~verbose:verbose ~only_defs:true 
  41
+              tmp_dir skip_list) 
  42
+    in
  43
+    let g = 
  44
+      Graph_code_bytecode.build ~verbose:verbose ~graph_code_java 
  45
+        tmp_dir skip_list in
40 46
     let facts = Graph_code_prolog.build tmp_dir g in
41 47
     let facts_pl_file = Filename.concat tmp_dir "facts.pl" in
42 48
     Common.with_open_outfile facts_pl_file (fun (pr_no_nl, _chan) ->
@@ -85,6 +91,16 @@ class Bar {
85 91
        ["method"]  (prolog_query ~files "kind(('Bar','g'), X), writeln(X)");
86 92
    );
87 93
 
  94
+   "at" >:: (fun () ->
  95
+     let files = [
  96
+"Foo.java", " // line 1
  97
+              // line 2
  98
+class Foo {   // line 3
  99
+}
  100
+";] in
  101
+     assert_equal
  102
+       ["3"] (prolog_query ~files "at('Foo', _, X), writeln(X)")
  103
+   );
88 104
  ])
89 105
 
90 106
 (*****************************************************************************)
6  main_codegraph.ml
@@ -227,7 +227,11 @@ let build_graph_code lang root =
227 227
     | "web" -> raise Todo
228 228
     | "c" -> Graph_code_c.build ~verbose:!verbose root skip_list
229 229
     | "java" -> Graph_code_java.build ~verbose:!verbose root skip_list
230  
-    | "bytecode" -> Graph_code_bytecode.build ~verbose:!verbose root skip_list
  230
+    | "bytecode" -> 
  231
+      let graph_code_java = Some (Graph_code_java.build ~verbose:!verbose
  232
+        ~only_defs:true root skip_list) in
  233
+      Graph_code_bytecode.build ~verbose:!verbose ~graph_code_java
  234
+        root skip_list
231 235
     | "cmt"  -> Graph_code_cmt.build ~verbose:!verbose root skip_list
232 236
     | _ -> failwith ("language not supported: " ^ lang)
233 237
   in
7  main_codequery.ml
@@ -106,7 +106,12 @@ let build_prolog_db lang root =
106 106
         | "cmt" -> 
107 107
           Graph_code_cmt.build ~verbose:!verbose root skip_list 
108 108
         | "bytecode" -> 
109  
-          Graph_code_bytecode.build ~verbose:!verbose root skip_list 
  109
+          let graph_code_java = 
  110
+            Some (Graph_code_java.build ~verbose:!verbose ~only_defs:true
  111
+                    root skip_list) 
  112
+          in
  113
+          Graph_code_bytecode.build ~verbose:!verbose ~graph_code_java 
  114
+            root skip_list 
110 115
         | _ -> raise Impossible
111 116
       in
112 117
       let facts = Graph_code_prolog.build root g in
7  tests/bytecode/codegraph/Clone.java
... ...
@@ -1,6 +1,13 @@
1 1
 class Clone {
  2
+
  3
+
  4
+
2 5
   public Object foo() throws java.lang.CloneNotSupportedException {
  6
+
  7
+
3 8
     int x = 1;
4 9
     return this.clone();
  10
+
  11
+
5 12
   }
6 13
 }

0 notes on commit 13bb232

Please sign in to comment.
Something went wrong with that request. Please try again.