/
ParsingRun.scala
324 lines (295 loc) · 14.4 KB
/
ParsingRun.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
package fastparse
import fastparse.internal.{Instrument, Lazy, Msgs, Util}
/**
* Models an in-progress parsing run; contains all the mutable state that may
* be necessary during the parse, in order to avoid the individual parsers
* needing to perform their own allocations and instantiations, and greatly
* improving performance
*
* There are a few patterns that let us program with these mutable variables
* in a sort-of-pure-functional way:
*
* - If a parser that wishes to ignore changes to a field within their child
* parsers, a common pattern is to save the value of the field before the
* wrapped parser runs, and then re-set the field. e.g. this can be used to
* backtrack [[index]] after a lookahead parser finishes
*
* - If a parser wants to read the value of the field "returned" by multiple
* child parsers, make sure to read the field into a local variable after
* each child parser is complete to make sure the value you want from an
* earlier child isn't stomped over by a later child
*
* In general, for a parser to "return" a value in a mutable field, it is
* sufficient to simply set the value of that field before returning. It
* is the parent-parser's responsibility to make sure it reads out the value
* of the field to a local variable before running another child parser that
* will over-write the mutable field
*
* @param input The input to the parsing run, as a [[ParserInput]].
* @param startIndex Where the parse initially started, so as to allow
* `.result.traced` to re-create it with tracing enabled.
* @param originalParser The original parsing function we used to start this
* run, so as to allow `.result.traced` to re-create
* it with tracing enabled.
* @param traceIndex The index we wish to trace if tracing is enabled, else
* -1. Used to find failure messages to aggregate into
* `terminalMsgs`
* @param instrument Callbacks that can be injected before/after every
* `P(...)` parser.
* @param terminalMsgs When tracing is enabled, this collects up all the
* upper-most failures that happen at [[traceIndex]]
* (in [[Lazy]] wrappers) so they can be shown to the
* user at end-of-parse as suggestions for what could
* make the parse succeed. For terminal parsers like
* [[LiteralStr]], it just aggregate's the string
* representation. For composite parsers like `a ~ b`
* or `!a` which may fail at [[traceIndex]] even
* without any of their wrapped terminal parsers
* failing there, it makes use of the
* [[shortMsg]] as the string representation of
* the composite parser.
* @param shortMsg When tracing is enabled, this contains string
* representation of the last parser to run. Since
* parsers aren't really objects, we piece together
* the string in the parser body and return store it
* here, and an enclosing parser may fetch it back
* out to help build its own string representation.
* If the last parser started before the `traceIndex`,
* we only aggregate the portion of the parser msg
* that takes place after `traceIndex`
* @param failureStack The stack of named `P(...)` parsers in effect when
* the failure occured; only constructed when tracing
* is enabled via `traceIndex != -1`
* @param isSuccess Whether or not the parse is currently successful
* @param logDepth How many nested `.log` calls are currently surrounding us.
* Used to nicely indent the log output so you can see which
* parsers are nested within which other parsers; -1 means
* logging is disabled
* @param index The current index of the parse
* @param cut Has the current parse been prevented from backtracking?
* This field starts as `true` at top-level, since there
* is nowhere to backtrack to. Upon entering a parser
* that can backtrack, such as `|` or `.?` or `.rep`,
* it is set to `false`, and re-set to `true` upon
* encountering a `./` or `~/` cut operator that prevents
* backtracking.
* @param successValue The currently returned success value
* @param verboseFailures Whether or not we are currently collecting [[lastFailureMsg]]s;
* defaults to false, unless
* [[traceIndex]] is set OR we are inside a [[LogByNameOps.log]]
* call which necessitates tracing in order to print out
* failure traces during logging
* @param noDropBuffer Flag that prevents the parser from dropping earlier
* input. Used for the `.!` capture operator that needs
* the input around to return as a string, the `NoCut`
* operator that forces backtracking (regardless of
* internal cuts), or whitespace consumption which
* implicitly backtracks if the parser on the RHS of
* the whitespace fails or consumes 0 characters. The
* value for this field is lexically scoped, but it is
* up to the individual parser method implementations
* to set the values and remember to un-set it to
* the previous value after they finish. Forgetting to
* re-set it to the previous value can cause strange
* behavior or crashes.
* @param misc Additional key-value metadata that a user can attach
* to a parsing run, and manage however they like. Not
* as high performance as the built-in fields of
* [[ParsingRun]], but perhaps sufficient to satisfy
* ad-hoc needs e.g. keeping track of indentation or
* other contextual information
*/
final class ParsingRun[+T](val input: ParserInput,
val startIndex: Int,
val originalParser: ParsingRun[_] => ParsingRun[_],
val traceIndex: Int,
val instrument: Instrument,
// Mutable vars below:
var terminalMsgs: Msgs,
var aggregateMsgs: Msgs,
var shortMsg: Msgs,
var lastFailureMsg: Msgs,
var failureStack: List[(String, Int)],
var isSuccess: Boolean,
var logDepth: Int,
var index: Int,
var cut: Boolean,
var successValue: Any,
var verboseFailures: Boolean,
var noDropBuffer: Boolean,
val misc: collection.mutable.Map[Any, Any]){
/**
* Called by non-terminal parsers after completion, success or failure
*
* This needs to be called for both successful and failed parsers, as we need
* to record the msg of a successful parse in case it forms part of a larger
* failed parse later.
*
* For example:
*
* - Using "a" ~ ("b" ~ "c" | "d") to parse "abe"
* - We report that the the parser ("b" ~ "c" | "d") failed at index 1
* - That msg contains the msg of the parse "b" even though it was successful
*
* Overloaded to minimize the amount of callsite bytecode, since we do a ton
* of inlining in Fastparse, and large amounts of bytecode inlined in a method
* can cause JVM performance problems (e.g. JIT compilation may get disabled)
*/
def reportAggregateMsg(newshortMsg: Msgs): Unit = {
reportAggregateMsg(newshortMsg, aggregateMsgs)
}
def reportAggregateMsg(newshortMsg: Msgs,
newAggregateMsgs: Msgs): Unit = {
reportAggregateMsg(newshortMsg, newAggregateMsgs, false)
}
def reportAggregateMsg(newshortMsg: Msgs,
forceAggregate: Boolean): Unit = {
reportAggregateMsg(newshortMsg, aggregateMsgs, forceAggregate)
}
def reportAggregateMsg(newshortMsg: Msgs,
newAggregateMsgs: Msgs,
forceAggregate: Boolean): Unit = {
reportParseMsg0(
newshortMsg,
newAggregateMsgs,
forceAggregate,
newAggregateMsgs.value.nonEmpty
)
}
/**
* Called by any terminal parser; these are parsers for which displaying
* sub-failures does not make sense these include:
*
* - Individual strings or characters
* - Parsers like negation `!p` or `.filter` where the entire parser failing
* is not caused by sub-failure
* - Parsers like `.opaque`, where sub-failures are intentionally hidden and
* not shown to the user
*
* These "terminal" failures will be stored in the `terminalMsgs` in case
* a user wants to know what could have been placed at the failure point to
* let the parse progress
*/
def reportTerminalMsg(startIndex: Int, newshortMsg: Msgs): Unit = {
// We only care about terminal parsers which failed exactly at the traceIndex
if (!isSuccess && index == traceIndex) terminalMsgs :::= newshortMsg
reportParseMsg0(
if (startIndex >= traceIndex) newshortMsg else Msgs.empty,
if (startIndex >= traceIndex) newshortMsg else Msgs.empty,
false,
startIndex >= traceIndex
)
}
def reportParseMsg0(newshortMsg: Msgs,
newAggregateMsgs: Msgs,
forceAggregate: Boolean,
setShortMsg: Boolean): Unit = {
// `lastFailureMsg` ends up being set by the first parser to report a
// failure, while returning from the last parser to call `.freshFailure()
// (which nulls it out)
if (!isSuccess && lastFailureMsg == null) lastFailureMsg = newshortMsg
// We only set the `shortMsg` for some parsers. These include:
//
// - Terminal parsers which have `startIndex >= traceIndex`
//
// - Aggregate parsers which have non-empty `newAggregateMsgs`, indicating
// that they have either child terminal parsers with `startIndex >= traceIndex`
// or they have child aggregate parsers with non-empty `newAggregateMsgs`
//
// This lets us skip setting `shortMsg` for all parsers, terminal or
// aggregate, which run and terminate fully before `traceIndex`, and thus
// would be of no interest to a user debugging parse failures at `traceIndex`
shortMsg = if (setShortMsg) newshortMsg else Msgs.empty
// There are two cases when aggregating: either we stomp over the entire
// existing `aggregateMsgs` with `newshortMsg`, or we preserve it
// (with possible additions) with `newAggregateMsgs`.
aggregateMsgs =
if (forceAggregate) newAggregateMsgs
// We only replace the aggregate Msgs if:
//
// 1. We are not currently past a cut; if we are past a cut, there is no
// further backtracking and so the error aggregate that has occurred
// will be the final aggregate shown to the user
//
// 2. Only replace in case of failures
//
// 3. Only stomp over the given aggregation with shortMsg if the
// current parser has failed and the final parse `index` (after any
// backtracking) is still at-or-greater-than the `traceIndex`. That
// ensures that any parsers which started/ended before the point of
// failure are not shown, since they are irrelevant
else if (!cut && !isSuccess && traceIndex <= index) shortMsg
else newAggregateMsgs
}
// Use telescoping methods rather than default arguments to try and minimize
// the amount of bytecode generated at the callsite.
//
// Because fastparse inlines aggressively, it is very easy for a user to
// generate huge methods, so anything we can do to reduce the size of the
// generated code helps avoid bytecode size blowup
def freshSuccess[V](value: V): ParsingRun[V] = {
isSuccess = true
successValue = value
this.asInstanceOf[ParsingRun[V]]
}
def freshSuccessUnit(): ParsingRun[Unit] = {
isSuccess = true
successValue = ()
this.asInstanceOf[ParsingRun[Unit]]
}
def freshSuccessUnit(index: Int): ParsingRun[Unit] = {
isSuccess = true
successValue = ()
this.index = index
this.asInstanceOf[ParsingRun[Unit]]
}
def freshSuccess[V](value: V, index: Int): ParsingRun[V] = {
isSuccess = true
successValue = value
this.index = index
this.asInstanceOf[ParsingRun[V]]
}
def freshSuccess[V](value: V, cut: Boolean): ParsingRun[V] = {
isSuccess = true
successValue = value
this.cut = cut
this.asInstanceOf[ParsingRun[V]]
}
def freshSuccess[V](value: V, index: Int, cut: Boolean): ParsingRun[V] = {
isSuccess = true
successValue = value
this.index = index
this.cut = cut
this.asInstanceOf[ParsingRun[V]]
}
def freshFailure(): ParsingRun[Nothing] = {
if (verboseFailures){
lastFailureMsg = null
failureStack = Nil
}
isSuccess = false
this.asInstanceOf[ParsingRun[Nothing]]
}
def freshFailure(startPos: Int): ParsingRun[Nothing] = {
if (verboseFailures) {
lastFailureMsg = null
failureStack = Nil
}
isSuccess = false
index = startPos
this.asInstanceOf[ParsingRun[Nothing]]
}
def augmentFailure(index: Int): ParsingRun[Nothing] = {
this.index = index
this.asInstanceOf[ParsingRun[Nothing]]
}
def augmentFailure(index: Int, cut: Boolean): ParsingRun[Nothing] = {
this.index = index
this.cut = cut
this.asInstanceOf[ParsingRun[Nothing]]
}
def checkForDrop() = !noDropBuffer && cut
}
object ParsingRun{
def current(implicit i: ParsingRun[Any]): ParsingRun[Any] = i
}