Permalink
Browse files

lang_python: -parse_python mostly working on webpy/ source

  • Loading branch information...
1 parent 244ff04 commit f22971ef1d45482e65e0a4f9811a91df99f36f5e pad committed Nov 13, 2010
View
@@ -2,7 +2,7 @@
* 0.14
-** pfff_visual
+** codemap
*** more semantic visual feedback
can see arguments passed by refs visually (TakeArgNByRef)
@@ -16,7 +16,7 @@ as well as functions containing dynamic calls (ContainDynamicCall)
can now visualize the result of a git grep on a project.
*** better visualization of directories
-use different color for dirs and files, and highlight first
+use different color for dirs and files labels, and highlight first
letter of label at depth = 1
** introduce spatch, a syntactical patch
@@ -26,15 +26,17 @@ a DSL to express easily refactoring on PHP.
better support for XHP patterns with flexible matching
on attributes
-** introducing parsing_nw/
+** introducing lang_nw/
so can visualize also Tex/Latex/Noweb source (which includes
the documentation of pfff!)
-** introducing parsing_lisp/
+** introducing lang_lisp/
-** introducing parsing_haskell/
+** introducing lang_haskell/
-** introducing parsing_java/
+** introducing lang_java/
+
+** introducing lang_python/
** php analysis
@@ -54,24 +56,26 @@ the documentation of pfff!)
This can help the visualizer to give more semantic visual feedback.
*** layer type so can save results of global analysis and process them
-later in pfff_visual or pfff_statistics
+later in codemap or pfff_statistics
** pfff:ml
more highlight
** documentation
-wrote a wiki pages (intro, sgrep, spatch, features, vision, etc)
+wrote wiki pages (intro, sgrep, spatch, features, vision, roadmap, etc)
-applied pfff_visual on many open source project and generated screenshots.
+applied codemap on many open source project and generated screenshots.
** internals
refactor the code in visual/ to have smaller and cleaner files (thanks
-to literate programming and pfff_visual itself to show the problem
+to literate programming and codemap itself to show the problem
and assist in the refactoring)
+** renamed pfff_visual in codemap
+
* 0.13
first public release!
@@ -86,30 +86,171 @@ let keyword_table = Common.hash_of_list [
let letter = ['A'-'Z' 'a'-'z']
let digit = ['0'-'9']
+let ident = (letter | '_') (letter | digit | '_')*
+
+let newline = '\n'
+let space = [' ' '\t']
+
+let nonzerodigit = ['1'-'9']
+let octdigit = ['0'-'7']
+let hexdigit = digit | ['a'-'f'] | ['A'-'F']
+
+let octal = ['0'-'7']
+
+(* in long string we can have any kind of \, like \[, so '\\' ['a'-'z'] is
+ * not enough
+ *)
+let escapeseq =
+ ( '\\' octal octal octal | '\\' _)
+
+let decimalinteger = nonzerodigit digit* | '0'
+let octinteger = '0' octdigit+
+let hexinteger = '0' ('x' | 'X') hexdigit+
+
+let integer = (decimalinteger | octinteger | hexinteger)
+
+let intpart = digit+
+let fraction = '.' digit+
+let exponent = ('e' | 'E') ('+' | '-')? digit+
+
+let pointfloat = intpart? fraction | intpart '.'
+let exponentfloat = (intpart | pointfloat) exponent
+
+let floatnumber = (pointfloat | exponentfloat)
+
+let imagnumber = (floatnumber | intpart) ('j' | 'J')
+
(*****************************************************************************)
rule token = parse
(* ----------------------------------------------------------------------- *)
(* spacing/comments *)
(* ----------------------------------------------------------------------- *)
+ | "#" [^ '\n']* { TComment (tokinfo lexbuf) }
+
+ (* python use haskell layout so newline and indentation has to be handled
+ * in a special way by the caller of the lexer
+ *)
+ | newline { TCommentNewline (tokinfo lexbuf) }
+ | space+ { TCommentSpace (tokinfo lexbuf) }
(* ----------------------------------------------------------------------- *)
(* symbols *)
(* ----------------------------------------------------------------------- *)
+ | "=" { TEq (tokinfo lexbuf) }
+
+ | "(" { TOParen(tokinfo lexbuf) } | ")" { TCParen(tokinfo lexbuf) }
+ | "{" { TOBrace(tokinfo lexbuf) } | "}" { TCBrace(tokinfo lexbuf) }
+ | "[" { TOBracket(tokinfo lexbuf) } | "]" { TCBracket(tokinfo lexbuf) }
+ | "<<" { TOAngle(tokinfo lexbuf) } | ">>" { TCAngle(tokinfo lexbuf) }
+
+ | "+" { TPlus(tokinfo lexbuf) } | "-" { TMinus(tokinfo lexbuf) }
+ | "<" { TLess(tokinfo lexbuf) } | ">" { TMore(tokinfo lexbuf) }
+ | "<=" { TLessEq(tokinfo lexbuf) } | ">=" { TMoreEq(tokinfo lexbuf) }
+
+
+ | "==" { TEqEq(tokinfo lexbuf) }
+ | "<>" { TDiff(tokinfo lexbuf) }
+ | "!=" { TNotEq(tokinfo lexbuf) }
+
+ | "&" { TAnd(tokinfo lexbuf) }
+ | "|" { TOr(tokinfo lexbuf) }
+ | "^" { TXor(tokinfo lexbuf) }
+
+ | "`" { TBackQuote(tokinfo lexbuf) }
+ | "@" { TAt(tokinfo lexbuf) }
+
+ | "*" { TStar(tokinfo lexbuf) }
+ | "**" { TStarStar(tokinfo lexbuf) }
+ | "," { TComma(tokinfo lexbuf) }
+
+ | "." { TDot(tokinfo lexbuf) }
+ | "..." { TEllipsis(tokinfo lexbuf) }
+ | ":" { TColon(tokinfo lexbuf) }
+ | "~" { TTilde(tokinfo lexbuf) }
+
+ | "/" { TSlash(tokinfo lexbuf) }
+ | "//" { TSlashSlash(tokinfo lexbuf) }
+ | "%" { TPercent(tokinfo lexbuf) }
+
+
+ | "+=" | "-=" | "*=" | "/=" | "//=" | "%=" | "**="
+ | ">>=" | "<<=" | "&=" | "^=" | "|=" {
+ let s = tok lexbuf in
+ TAugOp (s, tokinfo lexbuf)
+ }
+
(* ----------------------------------------------------------------------- *)
(* Keywords and ident *)
(* ----------------------------------------------------------------------- *)
+ | ident {
+ let info = tokinfo lexbuf in
+ let s = tok lexbuf in
+ match Common.optionise (fun () -> Hashtbl.find keyword_table s) with
+ | Some f -> f info
+ | None -> TIdent (s, info)
+ }
(* ----------------------------------------------------------------------- *)
(* Constant *)
(* ----------------------------------------------------------------------- *)
+ | integer { TInt (tok lexbuf, tokinfo lexbuf) }
+ | integer ('l' | 'L') { TInt (tok lexbuf, tokinfo lexbuf) }
(* ----------------------------------------------------------------------- *)
(* Strings *)
(* ----------------------------------------------------------------------- *)
+ | ['u''U']? "'" {
+ let info = tokinfo lexbuf in
+ let s = string_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'"))
+ }
+ | ['u''U']? '"' {
+ let info = tokinfo lexbuf in
+ let s = string_double_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "\""))
+ }
+
+ | ['u''U']? "'''" {
+ let info = tokinfo lexbuf in
+ let s = string_triple_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'''"))
+ }
+ | ['u''U']? '"' '"' '"' {
+ let info = tokinfo lexbuf in
+ let s = string_triple_double_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "\"\"\""))
+ }
+
+ (* TODO: the rules for the raw string are not exactly the same;
+ * should not call the same string_xxx
+ *)
+ | ("r" | "ur" | "R" | "UR" | "Ur" | "uR") "'" {
+ let info = tokinfo lexbuf in
+ let s = string_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'"))
+ }
+ | ("r" | "ur" | "R" | "UR" | "Ur" | "uR") '"' {
+ let info = tokinfo lexbuf in
+ let s = string_double_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'"))
+ }
+
+ | ("r" | "ur" | "R" | "UR" | "Ur" | "uR") "'''" {
+ let info = tokinfo lexbuf in
+ let s = string_triple_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'"))
+ }
+ | ("r" | "ur" | "R" | "UR" | "Ur" | "uR") '"' '"' '"' {
+ let info = tokinfo lexbuf in
+ let s = string_triple_double_quote lexbuf in
+ TString (s, info +> Parse_info.tok_add_s (s ^ "'"))
+ }
+
+
(* ----------------------------------------------------------------------- *)
(* Misc *)
(* ----------------------------------------------------------------------- *)
@@ -127,3 +268,54 @@ rule token = parse
}
(*****************************************************************************)
+
+and string_quote = parse
+ | "'" { "" }
+
+ | [^ '\'' '\n']* { let s = tok lexbuf in s ^ string_quote lexbuf }
+ | escapeseq { let s = tok lexbuf in string_quote lexbuf }
+
+ | eof { pr2 "LEXER: end of file in string_quote"; "'"}
+ | _ { let s = tok lexbuf in
+ pr2 ("LEXER: unrecognised symbol in string_quote:"^s);
+ s ^ string_quote lexbuf
+ }
+
+and string_double_quote = parse
+ | '"' { "" }
+
+ | [^ '\"' '\n']* { let s = tok lexbuf in s ^ string_double_quote lexbuf }
+ | escapeseq { let s = tok lexbuf in string_double_quote lexbuf }
+
+
+ | eof { pr2 "LEXER: end of file in string_double_quote"; "'"}
+ | _ { let s = tok lexbuf in
+ pr2 ("LEXER: unrecognised symbol in string_double_quote:"^s);
+ s ^ string_double_quote lexbuf
+ }
+
+and string_triple_quote = parse
+ | "'''" { "" }
+
+ | [^ '\\' '\'' ]* { let s = tok lexbuf in s ^ string_triple_quote lexbuf }
+ | escapeseq { let s = tok lexbuf in string_triple_quote lexbuf }
+ | "'" { let s = tok lexbuf in string_triple_quote lexbuf }
+
+ | eof { pr2 "LEXER: end of file in string_triple_quote"; "'"}
+ | _ { let s = tok lexbuf in
+ pr2 ("LEXER: unrecognised symbol in string_triple_quote:"^s);
+ s ^ string_triple_quote lexbuf
+ }
+
+and string_triple_double_quote = parse
+ | '"' '"' '"' { "" }
+
+ | [^ '\\' '"' ]* { let s = tok lexbuf in s ^ string_triple_double_quote lexbuf }
+ | escapeseq { let s = tok lexbuf in string_triple_double_quote lexbuf }
+ | '"' { let s = tok lexbuf in string_triple_double_quote lexbuf }
+
+ | eof { pr2 "LEXER: end of file in string_triple_double_quote"; "'"}
+ | _ { let s = tok lexbuf in
+ pr2 ("LEXER: unrecognised symbol in string_triple_double_quote:"^s);
+ s ^ string_triple_double_quote lexbuf
+ }
@@ -35,7 +35,7 @@ let find_python_files_of_dir_or_files xs =
+> List.filter (fun filename ->
let ftype = File_type.file_type_of_file filename in
match ftype with
- | File_type.PL (File_type.ML ("ml" | "mli")) -> true
+ | File_type.PL (File_type.Python) -> true
| _ -> false
) |> Common.sort
@@ -113,7 +113,13 @@ let tokens a =
(*****************************************************************************)
let parse2 filename =
- raise Todo
+
+ let stat = Parse_info.default_stat filename in
+ let toks_orig = tokens filename in
+
+ (* TODO *)
+ [(), ("", toks_orig)], stat
+
let parse a =
Common.profile_code "Parse_python.parse" (fun () -> parse2 a)
@@ -42,6 +42,13 @@ open Ast_python
/*(*-----------------------------------------*)*/
/*(* tokens with "values" *)*/
+%token <string * Ast_python.info> TInt
+%token <string * Ast_python.info> TFloat
+%token <string * Ast_python.info> TComplex
+%token <string * Ast_python.info> TChar
+%token <string * Ast_python.info> TString
+
+%token <string * Ast_python.info> TIdent
/*(* keywords tokens *)*/
%token <Ast_python.info>
@@ -56,6 +63,30 @@ open Ast_python
Tdel Tfrom Tas Twith Tassert Tpass Texcept Timport Tprint Texec Tin Tis
/*(* syntax *)*/
+%token <Ast_python.info> TOParen TCParen
+%token <Ast_python.info> TOBracket TCBracket
+%token <Ast_python.info> TOBrace TCBrace
+%token <Ast_python.info> TOAngle TCAngle
+
+%token <Ast_python.info>
+ TComma
+ TColon
+ TBackQuote
+ TDot
+ TEllipsis
+ TStar TStarStar
+ TEq
+ TPlus TMinus
+ TTilde
+ TSlash TSlashSlash
+ TPercent
+ TAnd TOr TXor
+ TLess TMore TEqEq TMoreEq TLessEq TDiff TNotEq
+ TAt
+
+%token <string * Ast_python.info>
+ TAugOp
+
/*(* operators *)*/
@@ -11,8 +11,8 @@ open OUnit
(*****************************************************************************)
let test_tokens_python file =
- if not (file =~ ".*\\.ml[iyl]?")
- then pr2 "warning: seems not a ocaml file";
+ if not (file =~ ".*\\.py")
+ then pr2 "warning: seems not a python file";
Flag.verbose_lexing := true;
Flag.verbose_parsing := true;
Oops, something went wrong.

0 comments on commit f22971e

Please sign in to comment.