-
Notifications
You must be signed in to change notification settings - Fork 32
/
interpreted.clj
170 lines (147 loc) · 6.13 KB
/
interpreted.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
(ns clj-antlr.interpreted
"Interpreter for antlr grammars. Slightly slower, but easier to use than the
full antlr compilation process."
(:require [clj-antlr.common :as common]
[clj-antlr.proto :as proto])
(:import (java.lang ThreadLocal)
(org.antlr.v4 Tool)
(org.antlr.v4.tool LexerGrammar
Grammar)
(org.antlr.v4.parse ANTLRParser)
(org.antlr.v4.runtime CommonTokenStream
Lexer
LexerInterpreter
Parser
ParserInterpreter)
(org.antlr.v4.runtime.tree ParseTree)))
(defn ^Tool tool
"Construct a new ANTLR tool"
[]
(Tool.))
(defn load-string-grammar
"Loads a grammar from a string."
[string]
(let [tool (tool)
ast (.parseGrammarFromString tool string)
grammar (.createGrammar tool ast)]
(.process tool grammar false)
grammar))
(defn ^Grammar grammar
"Loads a Grammar from a string filename, or a string containing a grammar
inline. If the string contains newlines, interprets as a grammar string;
otherwise as a filename."
[grammar-or-filename]
(if (re-find #"\n" grammar-or-filename)
(load-string-grammar grammar-or-filename)
(Grammar/load grammar-or-filename)))
(defn rule-index
"Finds the index of the given rule in a grammar. Throws if the rule is not
present in the given grammar. Example
(rule-index my-grammar :address)"
[^Grammar grammar rule-name]
(common/rule-index grammar (or rule-name (common/first-rule grammar))))
(defn lexer-interpreter
"Builds a new lexer interpreter around an empty input stream. Used to
initialize our parser, which will later reset and re-use this object
for speed."
[^Grammar grammar]
(.createLexerInterpreter grammar (common/char-stream "")))
(defn parser-interpreter
"Builds a new parser interpreter around a given grammar and lexer."
[^Grammar grammar ^Lexer lexer]
(.createParserInterpreter grammar (common/tokens lexer)))
(defn reset-lexer-interpreter!
"Prepares a lexer interpreter for a new run."
[^LexerInterpreter lexer error-listener char-stream]
(doto lexer
(.setInputStream char-stream)
(.reset)
(.removeErrorListeners)
(.addErrorListener error-listener)))
(defn reset-parser-interpreter!
"Prepares a parser interpreter for a new run."
[^ParserInterpreter parser error-listener token-stream]
(doto parser
(.setTokenStream token-stream)
(.reset)
(.removeErrorListeners)
(.addErrorListener error-listener)))
; Re-uses the same lexer and parser each time. Note that the :tokens and
; :parser returned by (parse) may be mutated at any time; they should only be
; used for static things like resolving token names.
(defrecord SinglethreadedParser [^Grammar grammar
^LexerInterpreter lexer
^ParserInterpreter parser]
proto/Parser
(parse [p opts text]
(locking p ; lmao pretty sure returning tokens is a race condition
; waiting to happen
(let [error-listener (common/error-listener)
char-stream (common/char-stream text opts)]
(reset-lexer-interpreter! lexer error-listener char-stream)
(let [tokens (common/tokens lexer)]
(reset-parser-interpreter! parser error-listener tokens)
(let [tree (.parse parser (rule-index grammar (:root opts)))]
; Throw errors unless requested not to
(when-let [errors (and (get opts :throw? true)
@error-listener)]
(throw (common/parse-error errors tree)))
{:tree tree
:tokens tokens
:errors @error-listener
:parser parser}))))))
(defn singlethreaded-parser
"Creates a new single-threaded parser for a grammar."
[^Grammar grammar]
(let [^Lexer lexer (.createLexerInterpreter grammar (common/char-stream ""))
parser (.createParserInterpreter grammar (common/tokens lexer))]
(SinglethreadedParser. grammar lexer parser)))
; Wrapper for using the singlethreaded parser in multiple threads.
(defrecord ThreadLocalParser [^ThreadLocal local grammar]
proto/Parser
(parse [_ opts text]
(let [parser (or (.get local)
(let [parser (singlethreaded-parser grammar)]
(.set local parser)
parser))]
(proto/parse parser opts text))))
(defn parser
"Construct a new parser."
([filename]
(ThreadLocalParser. (ThreadLocal.) (grammar filename))))
; (singlethreaded-parser (grammar filename))))
(defn parse
"Given a Grammar, options, and text to parse (a string, reader, or
inputstream), returns a map of the :parser, :tree, and :errors for the
input."
([^Grammar grammar opts input]
(let [error-listener (common/error-listener)
; Root node to start at
^String root (or (:root opts) (common/first-rule grammar))
rule (.getRule grammar root)
_ (assert rule)
rule (.index rule)
; Char stream
char-stream (if (get opts :case-sensitive? true)
(common/char-stream input)
(common/case-changing-char-stream input))
; Extract tokens
^Lexer lexer (doto (.createLexerInterpreter grammar char-stream)
(.removeErrorListeners)
(.addErrorListener error-listener))
tokens (common/tokens lexer)
; Create parser
^ParserInterpreter parser (doto
(.createParserInterpreter grammar tokens)
(.removeErrorListeners)
(.addErrorListener error-listener))]
; Parse
(let [tree (.parse parser rule)]
; Throw errors unless requested not to
(when-let [errors (and (get opts :throw? true)
@error-listener)]
(throw (common/parse-error errors tree)))
{:tree tree
:tokens tokens
:errors @error-listener
:parser parser}))))