-
Notifications
You must be signed in to change notification settings - Fork 32
/
common.clj
259 lines (228 loc) · 8.72 KB
/
common.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
(ns clj-antlr.common
"Common functions for building and using parsers."
(:require [clojure.string :as string]
[clojure.tools.logging :as log])
(:import (java.io InputStream
Reader)
(java.util.concurrent ConcurrentHashMap)
(clj_antlr ParseError)
(org.antlr.v4.runtime ANTLRErrorListener
ANTLRInputStream
CaseChangingCharStream
CaseInsensitiveInputStream
CharStreams
CommonTokenStream
Parser
RecognitionException)
(org.antlr.v4.tool Grammar)
(org.antlr.v4.runtime.tree ParseTree
ParseTreeVisitor
Tree)))
(def ^ConcurrentHashMap fast-keyword-cache
"A map of strings to keywords."
(ConcurrentHashMap. 1024))
(defn fast-keyword
"Like (keyword str), but faster."
[s]
(or (.get fast-keyword-cache s)
(let [k (keyword s)]
(if (< 1024 (.size fast-keyword-cache))
k
(do
(.put fast-keyword-cache s k)
k)))))
(defn char-stream-from-input
"Constructs a CharStream out of a String, Reader, or InputStream."
[s]
(condp instance? s
InputStream (CharStreams/fromStream s)
Reader (CharStreams/fromReader s)
String (CharStreams/fromString s)))
(defmacro multi-hinted-let
"Deprecated, since antlr-input-stream is deprecated.
A let expression which expands into multiple type-hinted bodies with runtime
type dispatch provided by instanceof. Thanks to amalloy, as usual!"
[[name expr classes] & body]
(let [x (gensym)]
`(let [~x ~expr]
(condp instance? ~x
~@(for [class classes
clause [class `(let [~(with-meta name {:tag class})
~x] ~@body)]]
clause)
(throw (IllegalArgumentException. (str "No matching class for
" ~x " in " '~classes)))))))
(declare hinted)
(defn antlr-input-stream
"Deprecated in favor of char-stream.
Constructs an ANTLRInputStream out of a String, Reader, or InputStream."
[s]
(log/warn "antlr-input-stream is deprecated and will be removed in a future clj-antlr release. Migrate to char-stream functionality.")
(multi-hinted-let [hinted s [InputStream Reader String]]
(ANTLRInputStream. hinted)))
(defn case-changing-char-stream
"Wraps a CharStream in a CaseChangingCharStream. Called with one argument,
defaults to lowercasing all input, but the upper? argument allows for case to be chosen.
Adapted from https://github.com/parrt/antlr4/blob/case-insensitivity-doc/doc/resources/CaseChangingCharStream.java."
([charstream]
(case-changing-char-stream charstream false))
([charstream upper?]
(CaseChangingCharStream. charstream upper?)))
(defn case-insensitive-input-stream
"Deprecated in favor of case-changing-char-stream.
Constructs an ANTLRInputStream out of a string. Presents all characters to
the lexer as lowercase, but getText methods will return the original string.
Adapted from https://gist.github.com/sharwell/9424666. Consumes memory
proportional to the input string."
[input]
(log/warn "case-insensitive-input-stream is deprecated and will be removed in a future clj-antlr release. Migrate to char-stream functionality.")
(CaseInsensitiveInputStream. input))
(defn char-stream
"Constructs a charstream. With no options, calls char-stream-from-input. With
options:
:case-sensitive? true calls char-stream-from-input,
false calls case-changing-char-stream"
([s] (char-stream-from-input s))
([s opts]
(if (get opts :case-sensitive? true)
(char-stream-from-input s)
(case-changing-char-stream (char-stream-from-input s) false))))
(defn input-stream
"Deprecated in favor of char-stream.
Constructs an inputstream. With no options, calls antlr-input-stream. With
options:
:case-sensitive? true calls antlr-input-stream,
false calls case-insensitive-input-stream"
([s]
(log/warn "input-stream is deprecated and will be removed in a future clj-antlr release. Migrate to char-stream functionality.")
(antlr-input-stream s))
([s opts]
(log/warn "input-stream is deprecated and will be removed in a future clj-antlr release. Migrate to char-stream functionality.")
(if (get opts :case-sensitive? true)
(antlr-input-stream s)
(case-insensitive-input-stream s))))
(defn tokens
"A token stream taken from a lexer."
[lexer]
(CommonTokenStream. lexer))
(defn visit
"Visits a node with a visitor."
[^ParseTreeVisitor visitor node]
(when-not (nil? node)
(.visit visitor node)))
(defn child
"Get a specific child in a tree."
[^Tree node i]
(.getChild node i))
(defn child-count
"How many children does a node have?"
[^Tree node]
(.getChildCount node))
(defn children
"Returns the children of a RuleNode."
[^Tree node]
(map #(.getChild node %)
(range (child-count node))))
(defn parent
"The parent of a node."
[^Tree node]
(.getParent node))
(defn text
"The text of a node."
[^ParseTree node]
(.getText node))
(defn first-rule
"The name of the first rule in a grammar."
[^Grammar grammar]
(aget (.getRuleNames grammar) 0))
(defn rule-index
"Given a grammar and the name of a rule, returns the integer index of that
rule."
[^Grammar grammar rule-name]
(let [rule (.getRule grammar ^String (name rule-name))]
(when-not rule
(throw (RuntimeException. (str "No such rule: " (pr-str rule-name)))))
(.index rule)))
(defn parser-rule-name
"Given a parser and an integer rule index, returns the string name of that
rule. Negative indexes map to nil."
[^Parser parser ^long index]
(when-not (neg? index)
(aget (.getRuleNames parser) index)))
(defn token-name
"Given a parser and a token index, returns the string name of that token.
Negative indexes map to nil."
[^Parser parser ^long index]
(when-not (neg? index)
(.getDisplayName (.getVocabulary parser) index)))
(defn parse-error
"Constructs a new ParseError exception with a list of errors."
[errors tree]
(ParseError. errors
tree
(string/join "\n" (map :message errors))))
(defn recognition-exception->map
"Converts a RecognitionException to a nice readable map."
[^RecognitionException e]
{:rule (.getCtx e)
:state (.getOffendingState e)
:expected (try (.getExpectedTokens e)
(catch IllegalArgumentException _
; I think ANTLR throws here for
; tokenizer errors.
nil))
:token (.getOffendingToken e)})
(defn error-listener
"A stateful error listener which accretes parse errors in a deref-able
structure. Deref returns nil if there are no errors; else a sequence of
heterogenous maps, depending on what debugging information is available."
[]
(let [errors (atom [])]
(reify
clojure.lang.IDeref
(deref [this] (seq (deref errors)))
ANTLRErrorListener
(reportAmbiguity [this
parser
dfa
start-index
stop-idex
exact
ambig-alts
configs]
; TODO
)
(reportAttemptingFullContext [this
parser
dfa
start-index
stop-index
conflicting-alts
configs])
(reportContextSensitivity [this
parser
dfa
start-index
stop-index
prediction
configs])
(syntaxError [this
recognizer
offending-symbol
line
char
message
e]
(let [err {:symbol offending-symbol
:line line
:char char
:message message}
err (if (isa? Parser recognizer)
(assoc err :stack (->> ^Parser recognizer
.getRuleInvocationStack
reverse))
err)
err (if e
(merge err (recognition-exception->map e))
err)]
(swap! errors conj err))))))