Skip to content
This repository has been archived by the owner on Jun 4, 2019. It is now read-only.

Commit

Permalink
erlang: basic lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
pad committed Jan 22, 2011
1 parent b324f85 commit 30b92d9
Show file tree
Hide file tree
Showing 13 changed files with 706 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -260,3 +260,5 @@ external/ocamlbdb/libcamlbdb.a
/lang_php/meta/ffi
/lang_php/meta/gen_code
/DB_LIGHT.marshall
/lang_erlang/parsing/parse_erlang.ml
/lang_erlang/parsing/parse_erlang.mli
3 changes: 3 additions & 0 deletions Makefile
Expand Up @@ -150,6 +150,7 @@ BASICLIBS=commons/commons.cma \
lang_java/parsing/lib.cma \
lang_python/parsing/lib.cma \
lang_csharp/parsing/lib.cma \
lang_erlang/parsing/lib.cma \

BASICSYSLIBS=nums.cma bigarray.cma str.cma unix.cma

Expand Down Expand Up @@ -199,6 +200,7 @@ LIBS= commons/commons.cma \
lang_python/analyze/lib.cma \
lang_csharp/parsing/lib.cma \
lang_csharp/analyze/lib.cma \
lang_erlang/parsing/lib.cma \

MAKESUBDIRS=commons \
$(BDBDIR) $(REGEXPDIR) $(MPIDIR) \
Expand Down Expand Up @@ -232,6 +234,7 @@ MAKESUBDIRS=commons \
lang_python/analyze \
lang_csharp/parsing \
lang_csharp/analyze \
lang_erlang/parsing \
lang_php/analyze \
lang_php/analyze/basic \
lang_php/analyze/foundation \
Expand Down
60 changes: 60 additions & 0 deletions lang_erlang/parsing/ast_erlang.ml
@@ -0,0 +1,60 @@
(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)
open Common


(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)

(*
*)

(*****************************************************************************)
(* The AST related types *)
(*****************************************************************************)

(* ------------------------------------------------------------------------- *)
(* Token/info *)
(* ------------------------------------------------------------------------- *)
type info = Parse_info.info
and tok = info

(* a shortcut to annotate some information with token/position information *)
and 'a wrap = 'a * info

(* ------------------------------------------------------------------------- *)
(* Names *)
(* ------------------------------------------------------------------------- *)

(* ------------------------------------------------------------------------- *)
(* Expressions *)
(* ------------------------------------------------------------------------- *)

(* ------------------------------------------------------------------------- *)
(* Class *)
(* ------------------------------------------------------------------------- *)

(* ------------------------------------------------------------------------- *)
(* Toplevel phrases *)
(* ------------------------------------------------------------------------- *)

type toplevel = unit

type program = unit

(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
5 changes: 5 additions & 0 deletions lang_erlang/parsing/flag_parsing_erlang.ml
@@ -0,0 +1,5 @@

let verbose_lexing = ref false
let verbose_parsing = ref false

let debug_lexer = ref false
222 changes: 222 additions & 0 deletions lang_erlang/parsing/lexer_erlang.mll
@@ -0,0 +1,222 @@
{
(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)

open Common

module Ast = Ast_erlang
module Flag = Flag_parsing_erlang

open Parser_erlang


(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)

(*
* http://www.erlang.org/download/erl_spec47.ps.gz appendix E
* and erlang-otp/lib/compiler/src/core_scan.erl
*)

(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)
exception Lexical of string

let tok lexbuf =
Lexing.lexeme lexbuf
let tokinfo lexbuf =
Parse_info.tokinfo_str_pos (Lexing.lexeme lexbuf) (Lexing.lexeme_start lexbuf)

(* ---------------------------------------------------------------------- *)
let keyword_table = Common.hash_of_list [
(* real keywords *)
"if", (fun ii -> Tif ii);
"cond", (fun ii -> Tcond ii);
"when", (fun ii -> Twhen ii);
"case", (fun ii -> Tcase ii);
"begin", (fun ii -> Tbegin ii);
"end", (fun ii -> Tend ii);
"let", (fun ii -> Tlet ii);
"of", (fun ii -> Tof ii);
"fun", (fun ii -> Tfun ii);
"after", (fun ii -> Tafter ii);
"query", (fun ii -> Tquery ii);
"catch", (fun ii -> Tcatch ii);
"receive", (fun ii -> Treceive ii);

(* operators *)
"div", (fun ii -> Tdiv ii);
"rem", (fun ii -> Trem ii);
"or", (fun ii -> Tor ii);
"xor", (fun ii -> Txor ii);
"bor", (fun ii -> Tbor ii);
"bxor", (fun ii -> Tbxor ii);
"bsl", (fun ii -> Tbsl ii);
"bsr", (fun ii -> Tbsr ii);
"and", (fun ii -> Tand ii);
"band", (fun ii -> Tband ii);
"not", (fun ii -> Tnot ii);
"bnot", (fun ii -> Tbnot ii);
]

}
(*****************************************************************************)

let letter = ['A'-'Z' 'a'-'z']
let digit = ['0'-'9']

let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']

let newline = '\n'
let space = [' ' '\t']

let nonzerodigit = ['1'-'9']
let octdigit = ['0'-'7']
let hexdigit = digit | ['a'-'f'] | ['A'-'F']

(* TODO, was copied from python *)
let decimalinteger = nonzerodigit digit* | '0'
let octinteger = '0' octdigit+
let hexinteger = '0' ('x' | 'X') hexdigit+

let integer = (decimalinteger | octinteger | hexinteger)

(* TODO, was in csharp *)
let escapeseq =
( '\\' '^' _ |
'\\' ['\'' '"' '\\' 'b' 'd' 'e' 'f' 'n' 'r' 's' 't' 'v'] |
'\\' octdigit |
'\\' octdigit octdigit |
'\\' octdigit octdigit octdigit
)
let namechars = (letter | digit | '@' | '_')+
let atom = lowercase namechars*
let variable = (uppercase namechars* | '_' namechars+)
(*****************************************************************************)
rule token = parse
(* ----------------------------------------------------------------------- *)
(* spacing/comments *)
(* ----------------------------------------------------------------------- *)
| "%" [^ '\n']* { TComment (tokinfo lexbuf) }
| newline { TCommentNewline (tokinfo lexbuf) }
| space+ { TCommentSpace (tokinfo lexbuf) }
(* ----------------------------------------------------------------------- *)
(* symbols *)
(* ----------------------------------------------------------------------- *)
| "(" { TOParen(tokinfo lexbuf) } | ")" { TCParen(tokinfo lexbuf) }
| "{" { TOBrace(tokinfo lexbuf) } | "}" { TCBrace(tokinfo lexbuf) }
| "[" { TOBracket(tokinfo lexbuf) } | "]" { TCBracket(tokinfo lexbuf) }
| "." { TDot(tokinfo lexbuf) }
| ":" { TColon(tokinfo lexbuf) }
| ";" { TSemiColon(tokinfo lexbuf) }
| "," { TComma(tokinfo lexbuf) }
| "?" { TQuestion(tokinfo lexbuf) }
| "|" { TPipe(tokinfo lexbuf) }
| "||" { TPipe(tokinfo lexbuf) }
| "->" { TArrow(tokinfo lexbuf) }
| "#" { TSharp(tokinfo lexbuf) }
| "+" { TPlus(tokinfo lexbuf) } | "-" { TMinus(tokinfo lexbuf) }
| "*" { TStar(tokinfo lexbuf) } | "/" { TDiv(tokinfo lexbuf) }
| "=" { TEq (tokinfo lexbuf) } | "==" { TEqEq(tokinfo lexbuf) }
| "/=" { TSlashEq(tokinfo lexbuf) }
| "=:=" { TEqColonEq (tokinfo lexbuf) }
| "=/=" { TEqSlashEq(tokinfo lexbuf) }
| "<" { TLess(tokinfo lexbuf) } | ">" { TMore(tokinfo lexbuf) }
| "=<" { TLessEq(tokinfo lexbuf) } | ">=" { TMoreEq(tokinfo lexbuf) }
| "++" { TInc(tokinfo lexbuf) }
| "--" { TDec(tokinfo lexbuf) }
| "!" { TBang(tokinfo lexbuf) }
| "<-" { TAssign(tokinfo lexbuf) }
(* ----------------------------------------------------------------------- *)
(* Keywords and ident *)
(* ----------------------------------------------------------------------- *)
| atom {
let info = tokinfo lexbuf in
let s = tok lexbuf in
match Common.optionise (fun () -> Hashtbl.find keyword_table s) with
| Some f -> f info
| None -> TIdent (s, info)
}
| variable { TVariable (tok lexbuf, tokinfo lexbuf) }
| '_' { TUnderscore (tokinfo lexbuf) }
(* ----------------------------------------------------------------------- *)
(* Constant *)
(* ----------------------------------------------------------------------- *)
| integer { TInt (tok lexbuf, tokinfo lexbuf) }
(* TODO: TChar ? TFloat ? *)
(* ----------------------------------------------------------------------- *)
(* Strings *)
(* ----------------------------------------------------------------------- *)
| '"' {
let info = tokinfo lexbuf in
let s = string_double_quote lexbuf in
TString (s, info +> Parse_info.tok_add_s (s ^ "\""))
}

(* ----------------------------------------------------------------------- *)
(* Misc *)
(* ----------------------------------------------------------------------- *)

(* ----------------------------------------------------------------------- *)
(* eof *)
(* ----------------------------------------------------------------------- *)
| eof { EOF (tokinfo lexbuf) }

| _ {
if !Flag.verbose_lexing
then pr2_once ("LEXER:unrecognised symbol, in token rule:"^tok lexbuf);
TUnknown (tokinfo lexbuf)
}

(*****************************************************************************)

and string_double_quote = parse
| '"' { "" }

| [^ '\\' '\"' '\n']* {
let s = tok lexbuf in s ^ string_double_quote lexbuf
}
| escapeseq { let s = tok lexbuf in s ^ string_double_quote lexbuf }


| eof { pr2 "LEXER: end of file in string_double_quote"; "'"}
| _ { let s = tok lexbuf in
pr2 ("LEXER: unrecognised symbol in string_double_quote:"^s);
s ^ string_double_quote lexbuf
}
53 changes: 53 additions & 0 deletions lang_erlang/parsing/lib_parsing_erlang.ml
@@ -0,0 +1,53 @@
(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)

open Common

open Ast_erlang

module Ast = Ast_erlang
module Flag = Flag_parsing_erlang

(* module V = Visitor_erlang *)

(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)

(*****************************************************************************)
(* Filemames *)
(*****************************************************************************)

let find_erlang_files_of_dir_or_files xs =
Common.files_of_dir_or_files_no_vcs_nofilter xs
+> List.filter (fun filename ->
let ftype = File_type.file_type_of_file filename in
match ftype with
| File_type.PL (File_type.Erlang) -> true
| _ -> false
) |> Common.sort

(*****************************************************************************)
(* Extract infos *)
(*****************************************************************************)

(*****************************************************************************)
(* Max min, range *)
(*****************************************************************************)

(*****************************************************************************)
(* AST helpers *)
(*****************************************************************************)

0 comments on commit 30b92d9

Please sign in to comment.