Skip to content
Browse files

adding lang_java/ to help visualize java code

  • Loading branch information...
1 parent d4122fb commit 9919d5b9ee8a95b15869ae80487e404e3936a66e pad committed Sep 26, 2010
View
5 Makefile
@@ -134,6 +134,7 @@ BASICLIBS=commons/commons.cma \
lang_js/parsing/lib.cma \
lang_cpp/parsing/lib.cma \
lang_php/matcher/lib.cma \
+ lang_java/parsing/lib.cma \
BASICSYSLIBS=nums.cma bigarray.cma str.cma unix.cma
@@ -172,6 +173,8 @@ LIBS= commons/commons.cma \
lang_js/analyze/lib.cma \
lang_cpp/parsing/lib.cma \
lang_cpp/analyze/lib.cma \
+ lang_java/parsing/lib.cma \
+ lang_java/analyze/lib.cma \
MAKESUBDIRS=commons \
$(BDBDIR) $(REGEXPDIR) $(MPIDIR) \
@@ -194,6 +197,8 @@ MAKESUBDIRS=commons \
lang_js/analyze \
lang_cpp/parsing \
lang_cpp/analyze \
+ lang_java/parsing \
+ lang_java/analyze \
lang_php/analyze \
lang_php/analyze/basic \
lang_php/analyze/foundation \
View
9 changes.txt
@@ -1,5 +1,14 @@
-*- org -*-
+* 0.14
+
+** introducing parsing_java/
+
+** introducing parsing_nw/
+so can visualize also Tex/Latex/Noweb source (which includes
+the documentation of pfff!)
+
+
* 0.13
public release
View
1 credits.txt
@@ -10,6 +10,7 @@ Thanks to
- Sylvain Conchon, Jean-Christophe Filli�tre and Julien Signoles for
ocamlgraph/ (LGPL)
- Lex Stein for ocamlbdb/ (GPL)
+ - Eric Cooper for the Joust Java parser (GPL)
- Xavier Leroy for ocamlmpi/ (LGPL)
- Markus Mottl for ocamlpcre/ (LGPL)
- Iain Proctor for ocamlthrift/ (ASF)
View
69 lang_java/analyze/Makefile
@@ -0,0 +1,69 @@
+TOP=../..
+##############################################################################
+# Variables
+##############################################################################
+TARGET=lib
+
+SRC= \
+ highlight_java.ml \
+ test_analyze_java.ml
+
+-include $(TOP)/Makefile.config
+
+SYSLIBS= str.cma unix.cma $(PCRECMA)
+LIBS=$(TOP)/commons/commons.cma \
+ $(TOP)/h_program-lang/lib.cma \
+ $(TOP)/h_version-control/lib.cma \
+ ../parsing/lib.cma \
+
+MAKESUBDIRS=
+
+INCLUDEDIRS= $(TOP)/commons \
+ $(TOP)/commons/ocollection $(TOP)/commons/ocamlextra \
+ $(TOP)/commons/lib-json \
+ $(TOP)/external/ocamlpcre/lib \
+ $(TOP)/h_program-lang $(TOP)/h_version-control \
+ $(TOP)/globals \
+ ../parsing \
+
+##############################################################################
+# Generic variables
+##############################################################################
+-include $(TOP)/Makefile.common
+
+
+##############################################################################
+# Top rules
+##############################################################################
+all:: rec $(TARGET).cma
+all.opt:: rec.opt $(TARGET).cmxa
+
+rec:
+ set -e; for i in $(MAKESUBDIRS); do $(MAKE) -C $$i all || exit 1; done
+
+rec.opt:
+ set -e; for i in $(MAKESUBDIRS); do $(MAKE) -C $$i all.opt || exit 1; done
+
+clean::
+ set -e; for i in $(MAKESUBDIRS); do $(MAKE) -C $$i clean; done
+depend::
+ set -e; for i in $(MAKESUBDIRS); do $(MAKE) -C $$i depend; done
+
+
+$(TARGET).cma: $(OBJS)
+ $(OCAMLC) -a -o $(TARGET).cma $(OBJS)
+
+$(TARGET).cmxa: $(OPTOBJS) $(LIBS:.cma=.cmxa)
+ $(OCAMLOPT) -a -o $(TARGET).cmxa $(OPTOBJS)
+
+$(TARGET).top: $(OBJS) $(LIBS)
+ $(OCAMLMKTOP) -o $(TARGET).top $(SYSLIBS) $(LIBS) $(OBJS)
+
+clean::
+ rm -f $(TARGET).top
+
+
+##############################################################################
+# Literate Programming rules
+##############################################################################
+
View
94 lang_java/analyze/highlight_java.ml
@@ -0,0 +1,94 @@
+(* Yoann Padioleau
+ *
+ * Copyright (C) 2010 Facebook
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation, with the
+ * special exception on linking described in file license.txt.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
+ * license.txt for more details.
+ *)
+
+open Common
+
+open Ast_java
+
+module Ast = Ast_java
+(*module V = Visitor_java *)
+
+open Highlight_code
+
+module T = Parser_java
+module TH = Token_helpers_java
+
+(*****************************************************************************)
+(* Prelude *)
+(*****************************************************************************)
+
+(*****************************************************************************)
+(* Helpers *)
+(*****************************************************************************)
+
+(*****************************************************************************)
+(* Code highlighter *)
+(*****************************************************************************)
+
+(* The idea of the code below is to visit the program either through its
+ * AST or its list of tokens. The tokens are easier for tagging keywords,
+ * number and basic entities. The Ast is better for tagging idents
+ * to figure out what kind of ident it is.
+ *)
+
+let visit_toplevel
+ ~tag_hook
+ prefs
+ (*db_opt *)
+ (toplevel, toks)
+ =
+ let already_tagged = Hashtbl.create 101 in
+ let tag = (fun ii categ ->
+ tag_hook ii categ;
+ Hashtbl.replace already_tagged ii true
+ )
+ in
+
+
+ (* -------------------------------------------------------------------- *)
+ (* toks phase 1 *)
+ (* -------------------------------------------------------------------- *)
+
+ let rec aux_toks xs =
+ match xs with
+ | [] -> ()
+ | x::xs ->
+ aux_toks xs
+ in
+ let toks' = toks +> Common.exclude (function
+ (* needed ? *)
+ (* | T.TCommentSpace _ -> true *)
+ | _ -> false
+ )
+ in
+ aux_toks toks';
+
+ (* -------------------------------------------------------------------- *)
+ (* ast phase 1 *)
+
+ (* -------------------------------------------------------------------- *)
+ (* toks phase 2 *)
+
+ toks +> List.iter (fun tok ->
+ match tok with
+
+ | T.EOF ii
+ -> ()
+ );
+
+ (* -------------------------------------------------------------------- *)
+ (* ast phase 2 *)
+
+ ()
View
8 lang_java/analyze/highlight_java.mli
@@ -0,0 +1,8 @@
+
+val visit_toplevel :
+ tag_hook:
+ (Ast_java.info -> Highlight_code.category -> unit) ->
+ Highlight_code.highlighter_preferences ->
+ (*(Database_php.id * Common.filename * Database_php.database) option -> *)
+ Ast_java.toplevel * Parser_java.token list ->
+ unit
View
0 lang_java/analyze/test_analyze_java.ml
No changes.
View
66 lang_java/parsing/Makefile
@@ -0,0 +1,66 @@
+TOP=../..
+##############################################################################
+# Variables
+##############################################################################
+TARGET=lib
+
+SRC= flag_parsing_java.ml \
+ ast_java.ml \
+ visitor_java.ml lib_parsing_java.ml \
+ lexer_helper.ml \
+ parser_java.ml lexer_java.ml \
+ token_helpers_java.ml \
+ parse_java.ml \
+ test_parsing_java.ml
+
+SYSLIBS= str.cma unix.cma
+
+LIBS= $(TOP)/commons/commons.cma \
+ $(TOP)/h_program-lang/lib.cma \
+
+INCLUDEDIRS= $(TOP)/commons $(TOP)/commons/ocamlextra \
+ $(TOP)/h_program-lang \
+
+
+##############################################################################
+# Generic variables
+##############################################################################
+
+-include $(TOP)/Makefile.common
+
+##############################################################################
+# Top rules
+##############################################################################
+all:: $(TARGET).cma
+all.opt:: $(TARGET).cmxa
+
+$(TARGET).cma: $(OBJS)
+ $(OCAMLC) -a -o $(TARGET).cma $(OBJS)
+
+$(TARGET).cmxa: $(OPTOBJS) $(LIBS:.cma=.cmxa)
+ $(OCAMLOPT) -a -o $(TARGET).cmxa $(OPTOBJS)
+
+$(TARGET).top: $(OBJS) $(LIBS)
+ $(OCAMLMKTOP) -o $(TARGET).top $(SYSLIBS) $(LIBS) $(OBJS)
+
+clean::
+ rm -f $(TARGET).top
+
+
+lexer_java.ml: lexer_java.mll
+ $(OCAMLLEX) $<
+clean::
+ rm -f lexer_java.ml
+beforedepend:: lexer_java.ml
+
+
+parser_java.ml parser_java.mli: parser_java.mly
+ $(OCAMLYACC) $<
+clean::
+ rm -f parser_java.ml parser_java.mli parser_java.output
+beforedepend:: parser_java.ml parser_java.mli
+
+
+##############################################################################
+# Generic rules
+##############################################################################
View
304 lang_java/parsing/ast_java.ml
@@ -0,0 +1,304 @@
+(* Joust: a Java lexer, parser, and pretty-printer written in OCaml
+ * Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ * Released under the GNU General Public License
+ *)
+
+(*****************************************************************************)
+(* The AST java related types *)
+(*****************************************************************************)
+
+(* forunparser: *)
+
+type info = {
+ pinfo : Common.parse_info;
+ comments_tag: comments_around ref; (* set in comment_annotater.ml *)
+ (* todo? token_info : sometimes useful to know what token it was *)
+}
+and il = info list
+
+(* wrap2 is like wrap, except that I use it often for separator such
+ * as ','. In that case the info is associated to the argument that
+ * follows, so in 'a,b' I will have in the list [(a,[]); (b,[','])]. *)
+and 'a wrap = 'a * il
+and 'a wrap2 = 'a * il
+
+
+
+
+
+
+(* ------------------------------------------------------------------------- *)
+(* Ident, namespace *)
+(* ------------------------------------------------------------------------- *)
+and ident = string wrap (* could do a wrap3 where wrap3 = just 1 info *)
+
+and name = ident (*wrap2 '.' *) list
+
+and names = name list
+
+(* ------------------------------------------------------------------------- *)
+(* Type *)
+(* ------------------------------------------------------------------------- *)
+
+and typ = typbis wrap
+ and typbis =
+ | TypeName of name (* include the 'void', 'int', and other primitive type *)
+ | ArrayType of typ
+
+
+(* ------------------------------------------------------------------------- *)
+(* Expression *)
+(* ------------------------------------------------------------------------- *)
+
+and expr = exprbis wrap
+ and exprbis =
+ | Name of name (* include 'this' and 'super' special names *)
+ | Literal of string
+ | ClassLiteral of typ
+
+ | NewClass of typ * exprs * decls wrap (* { } *) option
+ | NewQualifiedClass of expr * ident * exprs * decls wrap (* { } *) option
+ | NewArray of typ * exprs * int * init option
+
+ | Dot of expr * ident
+ | Call of expr * exprs
+ | ArrayAccess of expr * expr
+
+ | Postfix of expr * op
+ | Prefix of op * expr
+ | Cast of typ * expr
+ | Infix of expr * op * expr
+
+ | InstanceOf of expr * typ
+
+ | Conditional of expr * expr * expr
+ (* ugly java, as C, assignement is an expression not a statement :( *)
+ | Assignment of expr * op * expr
+
+and exprs = expr list
+
+and op = string
+
+
+(* ------------------------------------------------------------------------- *)
+(* Statement *)
+(* ------------------------------------------------------------------------- *)
+
+and stmt = stmtbis wrap
+ and stmtbis =
+ | Empty
+
+ | Block of stmts
+
+ (* expr but in grammar restricted to side-effect expr, so good *)
+ | Expr of expr
+
+ | If of expr * stmt * stmt option
+ | Switch of expr * (cases * stmts) list
+
+
+ | While of expr * stmt
+ | Do of stmt * expr
+ | For of stmts * expr option * stmts * stmt
+
+ | Break of ident option
+ | Continue of ident option
+ | Return of expr option
+ | Label of ident * stmt
+
+ | Sync of expr * stmt
+
+ | Try of stmt * catches * stmt option
+ | Throw of expr
+
+ (* decl as statement *)
+ | LocalVar of field
+
+ | LocalClass of class_decl
+
+ (* javaext: http://java.sun.com/j2se/1.4.2/docs/guide/lang/assert.html *)
+ | Assert of expr * expr option (* assert e or assert e : e2 *)
+
+and stmts = stmt list
+
+and case = casebis wrap
+ and casebis =
+ | Case of expr
+ | Default
+
+and catch = var * stmt
+
+and cases = case list
+and catches = catch list
+
+
+(* ------------------------------------------------------------------------- *)
+(* Variable declaration *)
+(* ------------------------------------------------------------------------- *)
+
+and modifier = modifierbis wrap (* could do wrap3 instead *)
+ and modifierbis =
+ | Public | Protected | Private
+ | Abstract
+ | Static
+ | Final
+ | StrictFP
+ | Transient | Volatile
+ | Synchronized
+ | Native
+
+and modifiers = modifier list
+
+and vars = var list
+
+and var =
+ { v_mods : modifiers;
+ v_type : typ;
+ v_name : ident }
+
+
+and init = initbis wrap
+ and initbis =
+ | ExprInit of expr
+ | ArrayInit of init list
+
+
+(* ------------------------------------------------------------------------- *)
+(* Method, field *)
+(* ------------------------------------------------------------------------- *)
+
+and method_decl =
+ { m_var : var;
+ m_formals : vars;
+ m_throws : names;
+ m_body : stmt }
+
+and field =
+ { f_var : var;
+ f_init : init option }
+
+
+(* ------------------------------------------------------------------------- *)
+(* Class *)
+(* ------------------------------------------------------------------------- *)
+
+and class_decl =
+ { cl_mods : modifiers;
+ cl_name : ident;
+ cl_super : name option;
+ cl_impls : names;
+ cl_body : decls }
+
+and interface =
+ { if_mods : modifiers;
+ if_name : ident;
+ if_exts : names;
+ if_body : decls }
+
+
+
+(* ------------------------------------------------------------------------- *)
+(* The toplevels elements *)
+(* ------------------------------------------------------------------------- *)
+
+and decl =
+ | Class of class_decl
+ | Interface of interface
+
+ | Field of field
+ | Method of method_decl
+ | Constructor of method_decl (* the m_var.m_type should be empty *)
+
+ | InstanceInit of stmt
+ | StaticInit of stmt
+
+
+and decls = decl list
+
+
+and compilation_unit =
+ { package : name wrap option;
+ imports : names;
+ decls : decls;
+ (* pad: comments : Source.comments *)
+ }
+
+and program = (compilation_unit, info list) Common.either
+
+and toplevel = compilation_unit
+
+(* ------------------------------------------------------------------------- *)
+
+
+
+(*****************************************************************************)
+(* C comments *)
+(*****************************************************************************)
+
+(* I often use m for comments as I can not use c (already use for c stuff)
+ * and com is too long.
+ *)
+
+(* this type will be associated to each token *)
+and comments_around = {
+ mbefore: comment_and_relative_pos list;
+ mafter: comment_and_relative_pos list;
+}
+ and comment_and_relative_pos = {
+
+ minfo: Common.parse_info;
+ (* the int represent the number of lines of difference between the
+ * current token and the comment. When on same line, this number is 0.
+ * When previous line, -1. In some way the after/before in previous
+ * record is useless because the sign of the integer can helps
+ * do the difference too, but I keep it that way.
+ *)
+ mpos: int;
+ (* todo?
+ * is_alone_in_line: bool; (*for labels, to avoid false positive*)
+ *)
+ }
+
+and comment = Common.parse_info
+and com = comment list ref
+
+
+
+let emptyComments= {
+ mbefore = [];
+ mafter = [];
+}
+
+(*****************************************************************************)
+(* Wrappers *)
+(*****************************************************************************)
+let unwrap = fst
+
+let pos_of_info ii = ii.pinfo.Common.charpos
+let str_of_info ii = ii.pinfo.Common.str
+let file_of_info ii = ii.pinfo.Common.file
+let line_of_info ii = ii.pinfo.Common.line
+let col_of_info ii = ii.pinfo.Common.column
+
+let parse_info_of_info ii = ii.pinfo
+
+
+let rewrap_str s ii =
+ let oldpinfo = ii.pinfo in
+ { ii with pinfo =
+ { oldpinfo with
+ Common.str = s
+ }
+ }
+
+let todoii = []
+let noii = []
+
+
+
+let compare_pos ii1 ii2 =
+ compare ii1.pinfo.Common.charpos ii2.pinfo.Common.charpos
+
+(*****************************************************************************)
+(* Some constructors *)
+(*****************************************************************************)
View
25 lang_java/parsing/copyright.txt
@@ -0,0 +1,25 @@
+Joust: a Java lexer, parser, and pretty-printer written in OCaml
+Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+Copyright (C) 2008 Yoann Padioleau <yoann.padioleau@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+On Debian GNU/Linux systems, the complete text of the GNU General
+Public License can be found in /usr/share/common-licenses/GPL.
+
+
+
+Some files and modifications are copyrighted by
+Yoann Padioleau <yoann.padioleau@gmail.com>
View
342 lang_java/parsing/diff_ast_mli
@@ -0,0 +1,342 @@
+--- ast_java.mli 2008-08-04 19:22:27.000000000 -0500
++++ ast_java.ml 2008-08-04 19:11:52.000000000 -0500
+@@ -2,19 +2,21 @@
+ Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ Released under the GNU General Public License *)
+
+-type ident
++type ident = { id : string; pos : int }
+
+-val ident : string -> int -> ident
++let ident s n = { id = s; pos = n }
+
+-val synth_id : string -> ident
++let id_string ident = ident.id
+
+-val id_string : ident -> string
++let id_pos ident = ident.pos
+
+-val star_ident : ident
++let synth_id s = { id = s; pos = -1 }
+
+-val this_ident : ident
++let star_ident = synth_id "*"
+
+-val super_ident : ident
++let this_ident = synth_id "this"
++
++let super_ident = synth_id "super"
+
+ type name = ident list
+
+@@ -24,11 +26,11 @@ type typ =
+ | TypeName of name
+ | ArrayType of typ
+
+-val no_type : typ
++let named_type str = TypeName [synth_id str]
+
+-val void_type : typ
++let no_type = TypeName []
+
+-val named_type : string -> typ
++let void_type = named_type "void"
+
+ type modifier =
+ | Public
+@@ -55,8 +57,7 @@ type compilation_unit =
+ { package : name option;
+ imports : names;
+ decls : decls;
+- (* pad: comments : Source.comments *)
+- }
++ comments : Source.comments }
+
+ and decls = decl list
+
+@@ -158,32 +159,258 @@ type mdeclarator = var_decl_id * vars
+
+ type var_decls = (var_decl_id * init option) list
+
+-val add_comments : compilation_unit -> compilation_unit
+-
+-val compilation_unit : name option -> names -> decls -> compilation_unit
+-
+-val class_decl : modifiers -> ident -> name option -> names -> decls -> class_decl
+-
+-val method_decl : method_decl -> stmt -> method_decl
+-
+-val interface_decl : modifiers -> ident -> names -> decls -> interface
+-
+-val method_header : modifiers -> typ -> mdeclarator -> names -> method_decl
+-
+-val field_decls : modifiers -> typ -> var_decls -> decls
+-
+-val var_decls : modifiers -> typ -> var_decls -> stmts
+-
+-val formal_decl : modifiers -> typ -> var_decl_id -> var
+-
+-val constructor : modifiers -> (ident * vars) -> names -> stmt -> decl
+-
+-val constructor_invocation : name -> exprs -> stmt
+-
+-val expr_super_invocation : expr -> exprs -> stmt
+-
+-val type_name : expr -> typ
++let add_comments comp =
++ { comp with comments = Source.comments () }
+
+-val id_pos : ident -> int
++let compilation_unit pkg ims dcls =
++ { package = pkg; imports = ims; decls = dcls; comments = [] }
+
+-val stmt_pos : stmt -> int
++let class_decl mods name super ifs body =
++ { cl_mods = mods; cl_name = name; cl_super = super;
++ cl_impls = ifs; cl_body = body }
++
++let method_decl hdr body =
++ { hdr with m_body = body }
++
++let interface_decl mods name extends body =
++ { if_mods = mods; if_name = name; if_exts = extends; if_body = body }
++
++(* Move array dimensions from variable name to type. *)
++
++let rec canon_var mods t v =
++ match v with
++ | IdentDecl str -> { v_mods = mods; v_type = t; v_name = str }
++ | ArrayDecl v' -> canon_var mods (ArrayType t) v'
++
++let method_header mods mtype (v, formals) throws =
++ { m_var = canon_var mods mtype v; m_formals = formals;
++ m_throws = throws; m_body = Empty }
++
++(* Return a list of field declarations in canonical form. *)
++
++let decls f mods vtype vars =
++ let dcl (v, init) =
++ f { f_var = canon_var mods vtype v; f_init = init }
++ in
++ List.map dcl vars
++
++let field_decls = decls (fun x -> Field x)
++
++let var_decls = decls (fun x -> LocalVar x)
++
++let formal_decl mods t v = canon_var mods t v
++
++let constructor mods (id, formals) throws body =
++ let var = { v_mods = mods; v_type = no_type; v_name = id } in
++ Constructor { m_var = var; m_formals = formals; m_throws = throws;
++ m_body = body }
++
++let constructor_invocation name args =
++ Expr (Call (Name name, args))
++
++let expr_super_invocation expr args =
++ Expr (Call (Dot (expr, super_ident), args))
++
++(* Convert an expression, which must be a name, into a named type. *)
++
++let type_name exp =
++ match exp with
++ | Name name -> TypeName name
++ | _ -> raise Parsing.Parse_error
++
++(* Find the position of a syntactic structure, or -1 if undefined. *)
++
++let opt_id_pos opt =
++ match opt with
++ | Some id -> id_pos id
++ | None -> -1
++
++let var_pos var = id_pos var.v_name
++
++let rec type_pos t =
++ match t with
++ | TypeName name -> id_pos (List.hd name)
++ | ArrayType t' -> type_pos t'
++
++let rec stmt_pos stmt =
++ match stmt with
++ | Block [] -> -1
++ | Block stmts -> stmts_pos stmts
++ | LocalVar fld -> var_pos fld.f_var
++ | LocalClass c -> id_pos c.cl_name
++ | Empty -> -1
++ | Label (lbl, _) -> id_pos lbl
++ | Expr e -> expr_pos e
++ | If (e, s1, opt) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else
++ let n = stmt_pos s1 in
++ if n <> -1 then n
++ else
++ (match opt with
++ | Some s2 -> stmt_pos s2
++ | None -> -1)
++ | Switch (e, sw) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else switch_pos sw
++ | While (e, st) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else stmt_pos st
++ | Do (st, e) ->
++ let n = stmt_pos st in
++ if n <> -1 then n
++ else expr_pos e
++ | For (init, test, update, st) ->
++ let n = stmts_pos init in
++ if n <> -1 then n
++ else
++ let n = (match test with Some e -> expr_pos e | None -> -1) in
++ if n <> -1 then n
++ else stmts_pos (update @ [st])
++ | Break opt -> opt_id_pos opt
++ | Continue opt -> opt_id_pos opt
++ | Return opt ->
++ (match opt with Some e -> expr_pos e | None -> -1)
++ | Throw e -> expr_pos e
++ | Sync (e, st) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else stmt_pos st
++ | Try (st, catches, Some f) ->
++ let n = stmt_pos st in
++ if n <> -1 then n
++ else
++ let n = catches_pos catches in
++ if n <> -1 then n
++ else stmt_pos f
++ | Try (st, catches, None) ->
++ let n = stmt_pos st in
++ if n <> -1 then n
++ else catches_pos catches
++
++and stmts_pos list =
++ match list with
++ | s :: rest ->
++ let n = stmt_pos s in
++ if n <> -1 then n
++ else stmts_pos rest
++ | [] -> -1
++
++and expr_stmt_pos e s =
++ let n = expr_pos e in
++ if n <> -1 then n
++ else stmt_pos s
++
++and switch_pos list =
++ match list with
++ | (cases, stmts) :: rest ->
++ let n = cases_pos cases in
++ if n <> -1 then n
++ else
++ let n = stmts_pos stmts in
++ if n <> -1 then n
++ else switch_pos rest
++ | [] -> -1
++
++and cases_pos list =
++ match list with
++ | Case e :: rest ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else cases_pos rest
++ | Default :: rest -> cases_pos rest
++ | [] -> -1
++
++and expr_pos e =
++ match e with
++ | Literal _ -> -1
++ | ClassLiteral t -> type_pos t
++ | NewClass (t, _, _) -> type_pos t
++ | NewQualifiedClass (e, id, _, _) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else id_pos id
++ | NewArray (t, dims, _, opt) ->
++ let n = type_pos t in
++ if n <> -1 then n
++ else
++ let n = exprs_pos dims in
++ if n <> -1 then n
++ else
++ (match opt with
++ | Some init -> init_pos init
++ | None -> -1)
++ | Dot (e, id) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else id_pos id
++ | Call (e, args) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else exprs_pos args
++ | ArrayAccess (e1, e2) ->
++ let n = expr_pos e1 in
++ if n <> -1 then n
++ else expr_pos e2
++ | Postfix (e, _) -> expr_pos e
++ | Prefix (_, e) -> expr_pos e
++ | Cast (t, e) ->
++ let n = type_pos t in
++ if n <> -1 then n
++ else expr_pos e
++ | Infix (e1, op, e2) ->
++ let n = expr_pos e1 in
++ if n <> -1 then n
++ else expr_pos e2
++ | InstanceOf (e, t) ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else type_pos t
++ | Conditional (e1, e2, e3) ->
++ let n = expr_pos e1 in
++ if n <> -1 then n
++ else
++ let n = expr_pos e2 in
++ if n <> -1 then n
++ else expr_pos e3
++ | Assignment (e1, _, e2) ->
++ let n = expr_pos e1 in
++ if n <> -1 then n
++ else expr_pos e2
++ | Name name ->
++ id_pos (List.hd name)
++
++and exprs_pos list =
++ match list with
++ | e :: rest ->
++ let n = expr_pos e in
++ if n <> -1 then n
++ else exprs_pos rest
++ | [] -> -1
++
++and init_pos init =
++ match init with
++ | ExprInit e -> expr_pos e
++ | ArrayInit inits -> inits_pos inits
++
++and inits_pos list =
++ match list with
++ | init :: rest ->
++ let n = init_pos init in
++ if n <> -1 then n
++ else inits_pos rest
++ | [] -> -1
++
++and catches_pos list =
++ match list with
++ | (var, stmt) :: rest ->
++ let n = var_pos var in
++ if n <> -1 then n
++ else
++ let n = stmt_pos stmt in
++ if n <> -1 then n
++ else catches_pos rest
++ | [] -> -1
View
4 lang_java/parsing/flag_parsing_java.ml
@@ -0,0 +1,4 @@
+
+let verbose_parsing = ref true
+
+let verbose_lexing = ref true
View
79 lang_java/parsing/lexer_helper.ml
@@ -0,0 +1,79 @@
+(* Joust: a Java lexer, parser, and pretty-printer written in OCaml
+ Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ Released under the GNU General Public License *)
+
+let file = ref ""
+
+let line = ref 0
+
+let set_file_name f =
+ file := f;
+ line := 1
+
+let next_line buf =
+ line := !line + 1
+
+let location () = Printf.sprintf "file %s, line %d" !file !line
+
+let lexeme_pos buf = Lexing.lexeme_start buf
+
+type comment = { mutable buffer : string; mutable pos : int }
+
+type comments = comment list
+
+let comment_list = ref []
+
+let comments () = List.rev !comment_list
+
+let new_comment () =
+ let com = { buffer = ""; pos = -1 } in
+ comment_list := com :: !comment_list;
+ com
+
+let current_comment () = List.hd !comment_list
+
+let begin_comment buf =
+ let comment = new_comment () in
+ comment.pos <- lexeme_pos buf;
+ comment.buffer <- Lexing.lexeme buf
+
+let continue_comment buf =
+ let comment = current_comment () in
+ let c = Lexing.lexeme_char buf 0 in
+ let ch = if c = '\r' then '\n' else c in
+ comment.buffer <- comment.buffer ^ String.make 1 ch
+
+let end_comment buf =
+ let comment = current_comment () in
+ comment.buffer <- comment.buffer ^ Lexing.lexeme buf
+
+let trim s =
+ let rec loop = function
+ | 0 -> 0
+ | i ->
+ let c = s.[i-1] in
+ if c = '\n' || c = '\r' then loop (i-1)
+ else i
+ in
+ let len = String.length s in
+ let n = loop len in
+ if n < len then String.sub s 0 n
+ else s
+
+let eol_comment buf =
+ let comment = new_comment () in
+ comment.pos <- lexeme_pos buf;
+ comment.buffer <- trim (Lexing.lexeme buf)
+
+let with_lexbuf f =
+ let chan = open_in !file in
+ let cleanup () =
+ close_in chan;
+ Parsing.clear_parser ();
+ comment_list := []
+ in
+ try
+ let result = f (Lexing.from_channel chan) in
+ cleanup ();
+ result
+ with e -> (cleanup (); raise e)
View
25 lang_java/parsing/lexer_helper.mli
@@ -0,0 +1,25 @@
+(* Joust: a Java lexer, parser, and pretty-printer written in OCaml
+ Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ Released under the GNU General Public License *)
+
+val set_file_name : string -> unit
+
+val with_lexbuf : (Lexing.lexbuf -> 'a) -> 'a
+
+val next_line : Lexing.lexbuf -> unit
+
+val location : unit -> string
+
+val begin_comment : Lexing.lexbuf -> unit
+val continue_comment : Lexing.lexbuf -> unit
+val end_comment : Lexing.lexbuf -> unit
+
+val eol_comment : Lexing.lexbuf -> unit
+
+val lexeme_pos : Lexing.lexbuf -> int
+
+type comment = { mutable buffer : string; mutable pos : int }
+
+type comments = comment list
+
+val comments : unit -> comments
View
337 lang_java/parsing/lexer_java.mll
@@ -0,0 +1,337 @@
+(* Joust: a Java lexer, parser, and pretty-printer written in OCaml
+ Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ Released under the GNU General Public License *)
+
+(* ocamllex lexer for Java
+
+ Attempts to conform to:
+
+ The Java Language Specification
+ Second Edition
+
+ James Gosling, Bill Joy, Guy Steele, Gilad Bracha *)
+
+{
+open Common
+
+open Lexer_helper
+open Parser_java
+
+(*****************************************************************************)
+let tok lexbuf = Lexing.lexeme lexbuf
+
+let tokinfo lexbuf =
+ {
+ Ast_java.pinfo = {
+ Common.charpos = Lexing.lexeme_start lexbuf;
+ Common.str = Lexing.lexeme lexbuf;
+ (* info filled in a post-lexing phase *)
+ Common.line = -1;
+ Common.column = -1;
+ Common.file = "";
+ };
+ comments_tag = ref Ast_java.emptyComments;
+ }
+
+exception Lexical of string
+(*exception Unterminated_comment*)
+
+
+let tok_add_s s ii =
+ Ast_java.rewrap_str ((Ast_java.str_of_info ii) ^ s) ii
+
+
+(* Java reserved words. *)
+
+let literal v = (v, (fun ii -> LITERAL (v,ii)))
+let primitive_type t = (t, (fun ii -> PRIMITIVE_TYPE (t, ii)))
+
+let keyword_table = Common.hash_of_list [
+ "abstract", (fun ii -> ABSTRACT ii);
+ "boolean", (fun ii -> BOOLEAN ii);
+ "break", (fun ii -> BREAK ii);
+ "byte", (fun ii -> BYTE ii);
+ "case", (fun ii -> CASE ii);
+ "catch", (fun ii -> CATCH ii);
+ "char", (fun ii -> CHAR ii);
+ "class", (fun ii -> CLASS ii);
+ "const", (fun ii -> CONST ii);
+ "continue", (fun ii -> CONTINUE ii);
+ "default", (fun ii -> DEFAULT ii);
+ "do", (fun ii -> DO ii);
+ "double", (fun ii -> DOUBLE ii);
+ "else", (fun ii -> ELSE ii);
+ "extends", (fun ii -> EXTENDS ii);
+ "final", (fun ii -> FINAL ii);
+ "finally", (fun ii -> FINALLY ii);
+ "float", (fun ii -> FLOAT ii);
+ "for", (fun ii -> FOR ii);
+ "goto", (fun ii -> GOTO ii);
+ "if", (fun ii -> IF ii);
+ "implements", (fun ii -> IMPLEMENTS ii);
+ "import", (fun ii -> IMPORT ii);
+ "instanceof", (fun ii -> INSTANCEOF ii);
+ "int", (fun ii -> INT ii);
+ "interface", (fun ii -> INTERFACE ii);
+ "long", (fun ii -> LONG ii);
+ "native", (fun ii -> NATIVE ii);
+ "new", (fun ii -> NEW ii);
+ "package", (fun ii -> PACKAGE ii);
+ "private", (fun ii -> PRIVATE ii);
+ "protected", (fun ii -> PROTECTED ii);
+ "public", (fun ii -> PUBLIC ii);
+ "return", (fun ii -> RETURN ii);
+ "short", (fun ii -> SHORT ii);
+ "static", (fun ii -> STATIC ii);
+ "strictfp", (fun ii -> STRICTFP ii);
+ "super", (fun ii -> SUPER ii);
+ "switch", (fun ii -> SWITCH ii);
+ "synchronized", (fun ii -> SYNCHRONIZED ii);
+ "this", (fun ii -> THIS ii);
+ "throw", (fun ii -> THROW ii);
+ "throws", (fun ii -> THROWS ii);
+ "transient", (fun ii -> TRANSIENT ii);
+ "try", (fun ii -> TRY ii);
+ "void", (fun ii -> VOID ii);
+ "volatile", (fun ii -> VOLATILE ii);
+ "while", (fun ii -> WHILE ii);
+
+ literal "true";
+ literal "false";
+ literal "null";
+
+ primitive_type "byte";
+ primitive_type "short";
+ primitive_type "char";
+ primitive_type "int";
+ primitive_type "long";
+ primitive_type "float";
+ primitive_type "double";
+ primitive_type "boolean";
+
+ (* javaext: 1.4 *)
+ "assert", (fun ii -> ASSERT ii);
+
+]
+
+
+
+}
+
+(*****************************************************************************)
+
+(* CHAPTER 3: Lexical Structure *)
+
+(* 3.4 Line Terminators *)
+
+let LF = '\n' (* newline *)
+let CR = '\r' (* return *)
+
+let LineTerminator = LF | CR | CR LF
+let InputCharacter = [^ '\r' '\n']
+
+(* 3.5 Input Elements and Tokens *)
+
+let SUB = '\026' (* control-Z *) (* decimal *)
+
+(* 3.6 White Space *)
+
+let SP = ' ' (* space *)
+let HT = '\t' (* horizontal tab *)
+let FF = '\012' (* form feed *) (* decimal *)
+
+let WhiteSpace = SP | HT | FF (* | LineTerminator -- handled separately *)
+
+(* 3.7 Comments *)
+
+(* let TraditionalComment = "/*" ([^ '*'] | '*' [^ '/'])* "*/" *)
+let EndOfLineComment = "//" InputCharacter* LineTerminator
+(* let Comment = TraditionalComment | EndOfLineComment *)
+
+(* 3.8 Identifiers *)
+
+let Letter = ['A'-'Z' 'a'-'z' '_' '$']
+let Digit = ['0'-'9']
+let Identifier = Letter (Letter | Digit)*
+
+(* 3.10.1 Integer Literals *)
+
+let IntegerTypeSuffix = ['l' 'L']
+
+let DecimalIntegerLiteral = ('0' | ['1'-'9'] Digit*) IntegerTypeSuffix?
+
+let HexDigit = ['0'-'9' 'a'-'f' 'A'-'F']
+let HexIntegerLiteral = '0' ['x' 'X'] HexDigit+ IntegerTypeSuffix?
+
+let OctalDigit = ['0'-'7']
+let OctalIntegerLiteral = '0' OctalDigit+ IntegerTypeSuffix?
+
+let IntegerLiteral =
+ DecimalIntegerLiteral
+| HexIntegerLiteral
+| OctalIntegerLiteral
+
+(* 3.10.2 Floating-Point Literals *)
+
+let ExponentPart = ['e' 'E'] ['+' '-']? Digit+
+
+let FloatTypeSuffix = ['f' 'F' 'd' 'D']
+
+let FloatingPointLiteral =
+ (Digit+ '.' Digit* | '.' Digit+) ExponentPart? FloatTypeSuffix?
+| Digit+ (ExponentPart FloatTypeSuffix? | ExponentPart? FloatTypeSuffix)
+
+(* 3.10.3 Boolean Literals *)
+
+let BooleanLiteral = "true" | "false"
+
+(* 3.10.6 Escape Sequences for Character and String Literals *)
+
+let OctalEscape = '\\' ['0'-'3']? OctalDigit? OctalDigit
+
+(* Not in spec -- added because we don't handle Unicode elsewhere. *)
+
+let UnicodeEscape = "\\u" HexDigit HexDigit HexDigit HexDigit
+
+let EscapeSequence =
+ '\\' ['b' 't' 'n' 'f' 'r' '"' '\'' '\\']
+| OctalEscape
+| UnicodeEscape
+
+(* 3.10.4 Character Literals *)
+
+let SingleCharacter = [^ '\'' '\\' '\n' '\r']
+let CharacterLiteral = '\'' (SingleCharacter | EscapeSequence) '\''
+
+(* 3.10.5 String Literals *)
+
+let StringCharacter = [^ '"' '\\' '\n' '\r']
+let StringLiteral = '"' (StringCharacter | EscapeSequence)* '"'
+
+(* 3.10.7 The Null Literal *)
+
+let NullLiteral = "null"
+
+(* 3.10 Literals *)
+
+let Literal =
+ IntegerLiteral
+| FloatingPointLiteral
+| BooleanLiteral
+| CharacterLiteral
+| StringLiteral
+| NullLiteral
+
+(* Assignment operators, except '=', from section 3.12 *)
+
+let AssignmentOperator =
+ ('+' | '-' | '*' | '/' | '&' | '|' | '^' | '%' | "<<" | ">>" | ">>>") '='
+
+(*****************************************************************************)
+rule token = parse
+(* old:
+| WhiteSpace
+ { token lexbuf }
+| LineTerminator
+ { next_line lexbuf; token lexbuf }
+| "/*"
+ { begin_comment lexbuf; comment lexbuf; token lexbuf }
+| "//" InputCharacter* LineTerminator (* inline of EndOfLineComment*)
+ { eol_comment lexbuf; next_line lexbuf; token lexbuf }
+*)
+| [' ' '\t' '\n' '\r' '\011' '\012' ]+
+ { TCommentSpace (tokinfo lexbuf) }
+| "/*"
+ {
+ let info = tokinfo lexbuf in
+ let com = comment lexbuf in
+ TComment(info +> tok_add_s com)
+ }
+(* don't keep the trailing \n; it will be in another token *)
+| "//" InputCharacter*
+ { TComment(tokinfo lexbuf) }
+
+
+
+| Identifier
+ {
+ let info = tokinfo lexbuf in
+ let s = tok lexbuf in
+
+ match Common.optionise (fun () -> Hashtbl.find keyword_table s) with
+ | Some f -> f info
+ | None -> IDENTIFIER (s, info)
+
+ }
+| Literal {
+ let info = tokinfo lexbuf in
+ let s = tok lexbuf in
+ LITERAL (s, info)
+ }
+
+(* 3.11 Separators *)
+| '(' { LP(tokinfo lexbuf) } | ')' { RP(tokinfo lexbuf) }
+| '{' { LC(tokinfo lexbuf) } | '}' { RC(tokinfo lexbuf) }
+| '[' { LB(tokinfo lexbuf) } | ']' { RB(tokinfo lexbuf) }
+| ';' { SM(tokinfo lexbuf) }
+| ',' { CM(tokinfo lexbuf) }
+| '.' { DOT(tokinfo lexbuf) }
+
+(* 3.12 Operators *)
+| "=" { EQ(tokinfo lexbuf) }
+| ">" { GT(tokinfo lexbuf) } | "<" { LT(tokinfo lexbuf) }
+| "!" { NOT(tokinfo lexbuf) }
+| "~" { COMPL(tokinfo lexbuf) }
+| "?" { COND(tokinfo lexbuf) }
+| ":" { COLON(tokinfo lexbuf) }
+| "==" { EQ_EQ(tokinfo lexbuf) }
+| "<=" { LE(tokinfo lexbuf) } | ">=" { GE(tokinfo lexbuf) }
+| "!=" { NOT_EQ(tokinfo lexbuf) }
+| "&&" { AND_AND(tokinfo lexbuf) } | "||" { OR_OR(tokinfo lexbuf) }
+| "++" { INCR(tokinfo lexbuf) } | "--" { DECR(tokinfo lexbuf) }
+| "+" { PLUS(tokinfo lexbuf) } | "-" { MINUS(tokinfo lexbuf) }
+| "*" { TIMES(tokinfo lexbuf) } | "/" { DIV(tokinfo lexbuf) }
+| "&" { AND(tokinfo lexbuf) } | "|" { OR(tokinfo lexbuf) }
+| "^" { XOR(tokinfo lexbuf) }
+| "%" { MOD(tokinfo lexbuf) }
+| "<<" { LS(tokinfo lexbuf) } | ">>" { SRS(tokinfo lexbuf) }
+| ">>>" { URS(tokinfo lexbuf) }
+
+| AssignmentOperator {
+ let info = tokinfo lexbuf in
+ let s = tok lexbuf in
+ OPERATOR_EQ (s, info)
+ }
+
+| SUB? eof { EOF (tokinfo lexbuf +> Ast_java.rewrap_str "") }
+
+| _
+ {
+ if !Flag_parsing_java.verbose_lexing
+ then pr2_once ("LEXER:unrecognised symbol, in token rule:"^tok lexbuf);
+ TUnknown (tokinfo lexbuf)
+ }
+
+(*****************************************************************************)
+(* less: allow only char-'*' ? *)
+and comment = parse
+ | "*/" { tok lexbuf }
+ (* noteopti: *)
+ | [^ '*']+ { let s = tok lexbuf in s ^ comment lexbuf }
+ | [ '*'] { let s = tok lexbuf in s ^ comment lexbuf }
+ | eof {
+ pr2 ("LEXER: Unterminated_comment");
+ ""
+ }
+ | _
+ { let s = tok lexbuf in
+ pr2 ("LEXER: unrecognised symbol in comment:"^s);
+ s ^ comment lexbuf
+ }
+(* old:
+and comment = parse
+ "*/" { end_comment lexbuf }
+| LineTerminator { continue_comment lexbuf; next_line lexbuf; comment lexbuf }
+| eof { raise (Lexical "Unterminated_comment") }
+| _ { continue_comment lexbuf; comment lexbuf }
+*)
View
72 lang_java/parsing/lib_parsing_java.ml
@@ -0,0 +1,72 @@
+(* Copyright (C) 2008 Yoann Padioleau
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License (GPL)
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * file license.txt for more details.
+ *)
+
+open Common
+
+module Ast_c = Ast_java
+module Visitor_c = Visitor_java
+
+(*****************************************************************************)
+(* Extract infos *)
+(*****************************************************************************)
+
+let extract_info_visitor recursor x =
+ let globals = ref [] in
+ let visitor =
+ {
+ Visitor_c.default_visitor_s with
+ Visitor_c.kinfo_s = (fun (k, _) i ->
+ Common.push2 i globals;
+ i
+ )
+ } in
+ begin
+ ignore(recursor visitor x);
+ !globals
+ end
+
+let ii_of_stmt = extract_info_visitor Visitor_c.stmt
+let ii_of_ini = extract_info_visitor Visitor_c.init
+
+let ii_of_decls = extract_info_visitor Visitor_c.decls
+let ii_of_modifiers = extract_info_visitor Visitor_c.modifiers
+
+(*****************************************************************************)
+let max_min_ii_by_pos xs =
+ match xs with
+ | [] -> failwith "empty list, max_min_ii_by_pos"
+ | [x] -> (x, x)
+ | x::xs ->
+ let pos_leq p1 p2 = (Ast_c.compare_pos p1 p2) = (-1) in
+ xs +> List.fold_left (fun (maxii,minii) e ->
+ let maxii' = if pos_leq maxii e then e else maxii in
+ let minii' = if pos_leq e minii then e else minii in
+ maxii', minii'
+ ) (x,x)
+
+(*
+let info_to_fixpos ii =
+ match Ast_c.pinfo_of_info ii with
+ Ast_c.OriginTok pi -> Ast_cocci.Real pi.Common.charpos
+ | Ast_c.ExpandedTok (_,(pi,offset)) ->
+ Ast_cocci.Virt (pi.Common.charpos,offset)
+ | Ast_c.FakeTok (_,(pi,offset)) ->
+ Ast_cocci.Virt (pi.Common.charpos,offset)
+ | Ast_c.AbstractLineTok pi -> failwith "unexpected abstract"
+*)
+
+let max_min_by_pos xs =
+ let (i1, i2) = max_min_ii_by_pos xs in
+ (Ast_c.pos_of_info i1, Ast_c.pos_of_info i2)
+
+
+
View
340 lang_java/parsing/license.txt
@@ -0,0 +1,340 @@
+GPL
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
View
26 lang_java/parsing/modif-orig.txt
@@ -0,0 +1,26 @@
+pad:
+ - some file renaming to be more consistent with my C (and C++) parser
+ - some merge of files into one file (for lexer and test_parsing_java)
+ - some estetic change in ast_java, and grammar, group in big categories
+ ast elements like statement, expression, etc and add cosmetic separators.
+ - more grouping, move stuff in xxx_opt, list_opt, avoid intruduce
+ intermediate rules that are referenced only once (inline instead)
+ - add comments and set of tokens, as done in my other parsers
+ - add parse_info for all tokens
+ - put in lowercase the grammar rule, so better see what is a token and
+ what is a rule, and what is a OCaml constructor,
+ and make the grammer more like my other grammars.
+ - add the wrap for top big categories
+ - estet: align actions in grammar
+ - move parsing helper functions from ast_java to grammar
+ - add the wrap for ident
+ - visitor, comment_annotater
+ - move type and constructors from ast_java to parser.mly as it's used only
+ in intermediate parsing
+
+
+add the wrap2
+
+
+
+
View
194 lang_java/parsing/parse_java.ml
@@ -0,0 +1,194 @@
+(* Copyright (C) 2008 Yoann Padioleau *)
+
+open Common
+
+module TH = Token_helpers_java
+
+(*****************************************************************************)
+(* Helpers *)
+(*****************************************************************************)
+
+let lexbuf_to_strpos lexbuf =
+ (Lexing.lexeme lexbuf, Lexing.lexeme_start lexbuf)
+
+let token_to_strpos tok =
+ (TH.str_of_tok tok, TH.pos_of_tok tok)
+
+let error_msg_tok tok =
+ let file = TH.file_of_tok tok in
+ if !Flag_parsing_java.verbose_parsing
+ then Common.error_message file (token_to_strpos tok)
+ else ("error in " ^ file ^ "set verbose_parsing for more info")
+
+
+(*****************************************************************************)
+(* Stat *)
+(*****************************************************************************)
+type parsing_stat = {
+ mutable correct: int;
+ mutable bad: int;
+}
+
+let print_parsing_stat_list statxs =
+ let total = List.length statxs in
+ let perfect =
+ statxs
+ +> List.filter (function
+ | {bad = n} when n = 0 -> true
+ | _ -> false)
+ +> List.length
+ in
+
+ pr "\n\n\n---------------------------------------------------------------";
+ pr (
+ (sprintf "NB total files = %d; " total) ^
+ (sprintf "perfect = %d; " perfect) ^
+ (sprintf "=========> %d" ((100 * perfect) / total)) ^ "%"
+ );
+
+ let good = statxs +> List.fold_left (fun acc {correct = x} -> acc+x) 0 in
+ let bad = statxs +> List.fold_left (fun acc {bad = x} -> acc+x) 0 in
+
+ let gf, badf = float_of_int good, float_of_int bad in
+ pr (
+ (sprintf "nb good = %d, nb bad = %d " good bad) ^
+ (sprintf "=========> %f" (100.0 *. (gf /. (gf +. badf))) ^ "%"
+ )
+ )
+
+
+(*****************************************************************************)
+(* Lexing only *)
+(*****************************************************************************)
+
+
+let tokens2 file =
+ let table = Common.full_charpos_to_pos file in
+
+ Common.with_open_infile file (fun chan ->
+ let lexbuf = Lexing.from_channel chan in
+ try
+ let rec tokens_aux () =
+ let tok = Lexer_java.token lexbuf in
+
+
+ (* fill in the line and col information *)
+ let tok = tok +> TH.visitor_info_of_tok (fun ii ->
+ let pi = ii.Ast_java.pinfo in
+ { ii with Ast_java.pinfo=
+ (* could assert pinfo.filename = file ? *)
+
+ (Common.complete_parse_info file table pi)
+ })
+ in
+
+ if TH.is_eof tok
+ then [tok]
+ else tok::(tokens_aux ())
+ in
+ tokens_aux ()
+ with
+ | Lexer_java.Lexical s ->
+ failwith ("lexical error " ^ s ^ "\n =" ^
+ (Common.error_message file (lexbuf_to_strpos lexbuf)))
+ | e -> raise e
+ )
+
+let tokens a =
+ Common.profile_code "Java parsing.tokens" (fun () -> tokens2 a)
+
+
+(*****************************************************************************)
+(* Main entry point *)
+(*****************************************************************************)
+
+type info_item = (Parser_java.token list)
+type program2 =
+ (Ast_java.compilation_unit, Ast_java.info list) Common.either * info_item
+
+
+
+let error msg =
+ Printf.eprintf "%s: %s\n" (Lexer_helper.location ()) msg
+
+let parse_java_old filename =
+ Lexer_helper.set_file_name filename;
+ try
+ Lexer_helper.with_lexbuf
+ (fun lexbuf ->
+ let ast = Parser_java.goal Lexer_java.token lexbuf in
+ let toks = [] in (* TODO *)
+
+ let stat =
+ { correct = (Common.cat filename +> List.length); bad = 0 }
+ in
+ Printf.eprintf "%s: OK\n" (Lexer_helper.location ());
+ (Left ast, toks), stat
+ )
+ with e ->
+ error (Printexc.to_string e);
+ let toks = [] in (* TODO *)
+ let stat = { correct = 0; bad = (Common.cat filename +> List.length); } in
+ (Right (), toks), stat
+
+
+
+
+let parse_java filename =
+ let toks_orig = tokens filename in
+
+ let toks = toks_orig +> Common.exclude TH.is_comment in
+
+ (* Why use this lexing scheme ? Why not classically give lexer func
+ * to parser ? Because I now keep comments in lexer. Could
+ * just do a simple wrapper that when comment ask again for a token,
+ * but maybe simpler to use cur_tok technique.
+ *)
+ let all_tokens = ref toks in
+ let cur_tok = ref (List.hd !all_tokens) in
+
+ let lexer_function =
+ (fun _ ->
+ if TH.is_eof !cur_tok
+ then (pr2 "LEXER: ALREADY AT END"; !cur_tok)
+ else
+ let v = Common.pop2 all_tokens in
+ cur_tok := v;
+ !cur_tok
+ )
+ in
+
+ let lexbuf_fake = Lexing.from_function (fun buf n -> raise Impossible) in
+
+ Lexer_helper.set_file_name filename;
+
+ try (
+ let ast = Parser_java.goal lexer_function lexbuf_fake in
+
+ let stat =
+ { correct = (Common.cat filename +> List.length); bad = 0 }
+ in
+ Printf.eprintf "%s: OK\n" (Lexer_helper.location ());
+ (Left ast, toks_orig), stat
+ )
+ with e -> begin
+ (match e with
+ (* Lexical is not anymore launched I think *)
+ | Lexer_java.Lexical s ->
+ pr2 ("lexical error " ^s^ "\n =" ^ error_msg_tok !cur_tok)
+ | Parsing.Parse_error ->
+ pr2 ("parse error \n = " ^ error_msg_tok !cur_tok)
+(*
+ | Semantic_java.Semantic (s, i) ->
+ pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current)
+*)
+ | e -> raise e
+ );
+ error (Printexc.to_string e);
+ let stat = { correct = 0; bad = (Common.cat filename +> List.length); } in
+
+ let info_of_bads = Common.map_eff_rev TH.info_of_tok toks_orig in
+
+ (Right info_of_bads, toks_orig), stat
+ end
+
View
16 lang_java/parsing/parse_java.mli
@@ -0,0 +1,16 @@
+
+type parsing_stat = {
+ mutable correct: int;
+ mutable bad: int;
+}
+
+type info_item = (Parser_java.token list)
+type program2 =
+ (Ast_java.compilation_unit, Ast_java.info list) Common.either * info_item
+
+
+val parse_java :
+ Common.filename (*javafile*) -> (program2 * parsing_stat)
+
+
+val print_parsing_stat_list: parsing_stat list -> unit
View
1,263 lang_java/parsing/parser_java.mly
@@ -0,0 +1,1263 @@
+/*
+(* Joust: a Java lexer, parser, and pretty-printer written in OCaml
+ * Copyright (C) 2001 Eric C. Cooper <ecc@cmu.edu>
+ * Released under the GNU General Public License
+ *
+ * LALR(1) (ocamlyacc) grammar for Java
+ *
+ * Attempts to conform to:
+ * The Java Language Specification, Second Edition
+ * - James Gosling, Bill Joy, Guy Steele, Gilad Bracha
+ *
+ * Some modifications by Yoann Padioleau.
+ *)
+*/
+%{
+open Common
+open Ast_java
+
+(*****************************************************************************)
+(* Wrappers *)
+(*****************************************************************************)
+
+let myfst = fst
+
+(*****************************************************************************)
+(* Parse helpers functions *)
+(*****************************************************************************)
+
+let synth_id (s,ii) = (s,[ii])
+
+let this_ident ii = synth_id ("this", ii)
+let super_ident ii = synth_id ("super", ii)
+
+
+let named_type (str, ii) = TypeName [synth_id (str,ii)], noii
+
+let void_type ii = named_type ("void", ii)
+
+
+
+
+type var_decl_id = var_decl_idbis wrap
+ and var_decl_idbis =
+ | IdentDecl of ident
+ | ArrayDecl of var_decl_id
+(* and mdeclarator = var_decl_id * vars *)
+(* and var_decls = (var_decl_id * init option) list *)
+
+
+
+(* Move array dimensions from variable name to type. *)
+let rec canon_var mods t v =
+ match unwrap v with
+ | IdentDecl str -> { v_mods = mods; v_type = t; v_name = str }
+ | ArrayDecl v' -> canon_var mods (ArrayType t, todoii) v'
+
+let method_header mods mtype (v, formals) throws =
+ { m_var = canon_var mods mtype v; m_formals = formals;
+ m_throws = throws; m_body = Empty, todoii }
+
+(* Return a list of field declarations in canonical form. *)
+
+let decls f = fun mods vtype vars ->
+ let dcl (v, init) =
+ f { f_var = canon_var mods vtype v; f_init = init }
+ in
+ List.map dcl vars
+
+let field_decls = decls (fun x -> Field x)
+
+let var_decls = decls (fun x -> LocalVar x)
+
+
+
+let constructor_invocation name args =
+ Expr (Call ((Name name, todoii), args), todoii), todoii
+
+
+%}
+
+/*(*************************************************************************)*/
+
+/*
+(*
+ * pad: some tokens are not even used in this file because they are filtered
+ * in some intermediate phases. But they still must be declared because
+ * ocamllex may generate them, or some intermediate phase may also
+ * generate them (like some functions in parsing_hacks.ml)
+ *)
+*/
+
+/*(* unrecognized token *)*/
+%token <Ast_java.info> TUnknown
+
+%token <Ast_java.info> TComment TCommentSpace
+
+
+%token <(string * Ast_java.info)> IDENTIFIER
+%token <(string * Ast_java.info)> LITERAL
+%token <(string * Ast_java.info)> PRIMITIVE_TYPE
+
+/*
+ * 3.11 Separators
+ */
+%token <Ast_java.info> LP /* ( */
+%token <Ast_java.info> RP /* ) */
+%token <Ast_java.info> LC /* { */
+%token <Ast_java.info> RC /* } */
+%token <Ast_java.info> LB /* [ */
+%token <Ast_java.info> RB /* ] */
+%token <Ast_java.info> SM /* ; */
+%token <Ast_java.info> CM /* , */
+%token <Ast_java.info> DOT /* . */
+
+/*
+ * 3.12 Operators
+ */
+%token <Ast_java.info> EQ /* = */
+%token <Ast_java.info> GT /* > */
+%token <Ast_java.info> LT /* < */
+%token <Ast_java.info> NOT /* ! */
+%token <Ast_java.info> COMPL /* ~ */
+%token <Ast_java.info> COND /* ? */
+%token <Ast_java.info> COLON /* : */
+%token <Ast_java.info> EQ_EQ /* == */
+%token <Ast_java.info> LE /* <= */
+%token <Ast_java.info> GE /* >= */
+%token <Ast_java.info> NOT_EQ /* != */
+%token <Ast_java.info> AND_AND /* && */
+%token <Ast_java.info> OR_OR /* || */
+%token <Ast_java.info> INCR /* ++ */
+%token <Ast_java.info> DECR /* -- */
+%token <Ast_java.info> PLUS /* + */
+%token <Ast_java.info> MINUS /* - */
+%token <Ast_java.info> TIMES /* * */
+%token <Ast_java.info> DIV /* / */
+%token <Ast_java.info> AND /* & */
+%token <Ast_java.info> OR /* | */
+%token <Ast_java.info> XOR /* ^ */
+%token <Ast_java.info> MOD /* % */
+%token <Ast_java.info> LS /* << */
+%token <Ast_java.info> SRS /* >> */
+%token <Ast_java.info> URS /* >>> */
+
+%token <(string * Ast_java.info)> OPERATOR_EQ /* += -= *= /= &= |= ^= %= <<= >>= >>>= */
+
+/*
+ * 3.9 Keywords
+ */
+%token <Ast_java.info>
+ ABSTRACT BOOLEAN BREAK BYTE CASE CATCH CHAR CLASS CONST CONTINUE
+ DEFAULT DO DOUBLE ELSE EXTENDS FINAL FINALLY FLOAT FOR GOTO
+ IF IMPLEMENTS IMPORT INSTANCEOF INT INTERFACE LONG
+ NATIVE NEW PACKAGE PRIVATE PROTECTED PUBLIC RETURN
+ SHORT STATIC STRICTFP SUPER SWITCH SYNCHRONIZED
+ THIS THROW THROWS TRANSIENT TRY VOID VOLATILE WHILE
+ /*(* javaext: *)*/
+ ASSERT
+
+%token <Ast_java.info> EOF
+
+/*(*-----------------------------------------*)*/
+/*
+(*
+ * The start production must begin with a lowercase letter,
+ * because ocamlyacc defines the parsing function with that name.
+ *)
+*/
+%start goal
+%type <Ast_java.compilation_unit> goal
+
+%type <Ast_java.typ> type_java
+%type <Ast_java.stmt> statement
+%type <Ast_java.expr> expression
+
+%%
+
+/*(*************************************************************************)*/
+/*
+(* TOC:
+ * goal
+ * name
+ * type
+ * expr
+ * statement
+ * declaration
+ * class
+ * toplevel
+ *)
+*/
+/*(*************************************************************************)*/
+
+goal: compilation_unit EOF { $1 }
+
+
+/* 7.3 */
+compilation_unit: package_declaration_opt import_declarations_opt type_declarations_opt
+ {
+ let compilation_unit pkg ims dcls =
+ { package = pkg; imports = ims; decls = dcls; }
+ in
+ compilation_unit $1 $2 $3
+ }
+
+
+/*(*************************************************************************)*/
+/*(* ident, namespace *)*/
+/*(*************************************************************************)*/
+/* 3.8 */
+identifier: IDENTIFIER { fst $1, [snd $1] }
+
+/* 6.5 */
+name:
+ | identifier { [$1] }
+ | name DOT identifier { $3 :: $1 }
+
+
+
+class_or_interface_type: name { List.rev $1 }
+class_type: name { List.rev $1 }
+interface_type: name { List.rev $1 }
+
+
+
+/*(*************************************************************************)*/
+/*(* type *)*/
+/*(*************************************************************************)*/
+
+/* 4.1 */
+type_java:
+ | primitive_type { $1 }
+ | reference_type { $1 }
+
+
+/* 4.2 */
+primitive_type: PRIMITIVE_TYPE { named_type $1 }
+
+
+/* 4.3 */
+reference_type:
+ | class_or_interface_type { TypeName $1, noii }
+ | array_type { $1 }
+
+array_type:
+ | primitive_type LB RB { ArrayType $1, [$2;$3] }
+ | name LB RB { ArrayType (TypeName (List.rev $1),noii), [$2;$3] }
+ | array_type LB RB { ArrayType $1, [$2;$3] }
+
+
+/*(*************************************************************************)*/
+/*(* expr *)*/
+/*(*************************************************************************)*/
+
+
+/* 15.8 */
+primary:
+ | primary_no_new_array { $1 }
+ | array_creation_expression { $1 }
+
+
+
+primary_no_new_array:
+ | literal { Literal (fst $1), [snd $1] }
+ | class_literal { $1 }
+ | THIS { Name [this_ident $1], todoii }
+ | name DOT THIS { Name (List.rev (this_ident $3 :: $1)), todoii }
+ | LP expression RP { $2(*TODO*) }
+ | class_instance_creation_expression { $1 }
+ | field_access { $1 }
+ | method_invocation { $1 }
+ | array_access { $1 }
+
+
+/* 3.10 */
+literal: LITERAL { $1 }
+
+/* 15.8.2 */
+class_literal:
+ | primitive_type DOT CLASS { ClassLiteral $1, [$2;$3] }
+ | name DOT CLASS { ClassLiteral (TypeName (List.rev $1),noii), [$2;$3] }
+ | array_type DOT CLASS { ClassLiteral $1, [$2;$3] }
+ | VOID DOT CLASS { ClassLiteral (void_type $1), [$2;$3] }
+
+
+/* 15.9 */
+class_instance_creation_expression:
+ | NEW class_or_interface_type LP argument_list_opt RP class_body_opt
+ { NewClass ((TypeName $2,noii), $4, $6), [$1;$3;$5] }
+ | primary DOT NEW identifier LP argument_list_opt RP class_body_opt
+ { NewQualifiedClass ($1, $4, $6, $8), [$2;$3;$5;$7] }
+ /*(* not in 2nd edition java language specification. *)*/
+ | name DOT NEW identifier LP argument_list_opt RP class_body_opt
+ { NewQualifiedClass ((Name (List.rev $1),todoii), $4, $6, $8), [$2;$3;$5;$7] }
+
+
+/* 15.10 */
+array_creation_expression:
+ | NEW primitive_type dim_exprs dims_opt
+ { NewArray ($2, List.rev $3, $4, None), [$1] }
+ | NEW name dim_exprs dims_opt
+ { NewArray ((TypeName (List.rev $2),todoii), List.rev $3, $4, None), [$1] }
+ | NEW primitive_type dims array_initializer
+ { NewArray ($2, [], $3, Some $4), [$1] }
+ | NEW name dims array_initializer
+ { NewArray ((TypeName (List.rev $2),todoii), [], $3, Some $4), [$1] }
+
+
+dim_expr: LB expression RB { $2 (*TODO*) }
+
+dims:
+ | LB RB { 1 (*TODO*) }
+ | dims LB RB { $1 + 1 (*TODO*) }
+
+
+
+/* 15.11 */
+field_access:
+ | primary DOT identifier
+ { Dot ($1, $3), [$2] }
+ | SUPER DOT identifier
+ { Name [super_ident $1; $3],