-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Clojure.g4
105 lines (73 loc) · 2.16 KB
/
Clojure.g4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* Converted to ANTLR 4 by Terence Parr. Unsure of provence. I see
it commited by matthias.koester for clojure-eclipse project on
Oct 5, 2009:
https://code.google.com/p/clojure-eclipse/
Seems to me Laurent Petit had a version of this. I also see
Jingguo Yao submitting a link to a now-dead github project on
Jan 1, 2011.
https://github.com/laurentpetit/ccw/tree/master/clojure-antlr-grammar
Regardless, there are some issues perhaps related to "sugar";
I've tried to fix them.
This parses https://github.com/weavejester/compojure project.
I also note this is hardly a grammar; more like "match a bunch of
crap in parens" but I guess that is LISP for you ;)
*/
grammar Clojure;
file: list*;
form: literal
| list
| vector
| map
| reader_macro
| '#\'' SYMBOL // TJP added (get Var object instead of the value of a symbol)
;
list: '(' form* ')' ;
vector: '[' form* ']' ;
map: '{' (form form)* '}' ;
// TJP added '&' (gather a variable number of arguments)
special_form: ('\'' | '`' | '~' | '~@' | '^' | '@' | '&') form ;
lambda: '#(' form* ')' ;
meta_data: '#^' map form ;
var_quote: '\'' '#' SYMBOL ;
regex: '#' STRING ;
reader_macro
: lambda
| meta_data
| special_form
| regex
| var_quote
| SYMBOL '#' // TJP added (auto-gensym)
;
literal
: STRING
| NUMBER
| CHARACTER
| NIL
| BOOLEAN
| KEYWORD
| SYMBOL
| PARAM_NAME
;
STRING : '"' ( ~'"' | '\\' '"' )* '"' ;
NUMBER : '-'? [0-9]+ ('.' [0-9]+)? ([eE] '-'? [0-9]+)? ;
CHARACTER : '\\' . ;
NIL : 'nil';
BOOLEAN : 'true' | 'false' ;
KEYWORD : ':' SYMBOL ;
SYMBOL: '.' | '/' | NAME ('/' NAME)? ;
PARAM_NAME: '%' (('1'..'9')('0'..'9')*)? ;
fragment
NAME: SYMBOL_HEAD SYMBOL_REST* (':' SYMBOL_REST+)* ;
fragment
SYMBOL_HEAD
: 'a'..'z' | 'A'..'Z' | '*' | '+' | '!' | '-' | '_' | '?' | '>' | '<' | '=' | '$'
;
fragment
SYMBOL_REST
: SYMBOL_HEAD
| '&' // apparently this is legal in an ID: "(defn- assoc-&-binding ..." TJP
| '0'..'9'
| '.'
;
WS : [ \n\r\t\,] -> channel(HIDDEN) ;
COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ;