forked from sensorbee/sensorbee
/
jsonpath.go
518 lines (480 loc) · 14.5 KB
/
jsonpath.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
package data
import (
"fmt"
"strconv"
"strings"
)
type multiplicity int
const (
one multiplicity = iota
many
)
// Path is an entity that can be evaluated with a Map to
// return the value stored at the location specified by
// the Path.
type Path interface {
evaluate(Map) (Value, error)
set(Map, Value) error
}
// MustCompilePath takes a JSON Path as a string and returns
// an instance of Path representing that JSON Path, or panics
// if the parameter is not a valid JSON Path.
func MustCompilePath(s string) Path {
p, err := CompilePath(s)
if err != nil {
panic(err.Error())
}
return p
}
// CompilePath takes a JSON Path as a string and returns an
// instance of Path representing that JSON Path, or an error
// if the parameter is not a valid JSON Path.
func CompilePath(s string) (p Path, err error) {
// catch any parser errors
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("%v", r)
}
}()
// parse the statement
j := &jsonPeg{}
j.Buffer = s
j.Init()
if err := j.Parse(); err != nil {
return nil, fmt.Errorf("error parsing '%s' as a JSON Path", s)
}
j.Execute()
// discover nested array slice accesses
containsSlice := false
for _, c := range j.components {
if c.resultMultiplicity() == many {
if containsSlice {
return nil, fmt.Errorf("path '%s' contains multiple slice elements", s)
}
containsSlice = true
}
}
return j, nil
}
// evaluate returns the entry of the map located at the JSON Path
// represented by this jsonPeg instance.
func (j *jsonPeg) evaluate(m Map) (Value, error) {
// `current` holds the Value into which we descend, the extracted
// value is then written to `next` by `c.extract()`. By assigning
// `current = next` after `c.extract()` returns, we can go deeper.
var current Value = m
var next Value
// resultIsArray is set to true after we processed an
// extractor that returns an array-valued result by nature (such
// as arraySliceExtractor). The consequence of `resultIsArray == true`
// is that we will not process `current` itself with subsequent
// extractors, but each item in `current`.
resultIsArray := false
for _, c := range j.components {
if resultIsArray {
// replace each item in `current` by its extracted child item
arr, err := current.asArray()
if err != nil {
return nil, err
}
for i, currentElem := range arr {
err := c.extract(currentElem, &next)
if err != nil {
return nil, err
}
if c.resultMultiplicity() == many && next.Type() == TypeArray {
// if we get an nil array result, turn it into an empty array instead
if a, _ := next.asArray(); a == nil {
next = Array{}
}
}
// we assign a new value to a position of `current` here.
// this is only valid (and does not change the input Map)
// because all functions with `resultMultiplicity() == many`
// are required to return a *new* slice, not a pointer
// to an existing one!
arr[i] = next
}
} else {
// replace `current` by its extracted child item
err := c.extract(current, &next)
if err != nil {
return nil, err
}
if c.resultMultiplicity() == many && next.Type() == TypeArray {
// if we get an nil array result, turn it into an empty array instead
if a, _ := next.asArray(); a == nil {
next = Array{}
}
}
current = next
}
resultIsArray = resultIsArray || (c.resultMultiplicity() == many)
}
return current, nil
}
// set sets the entry of the map located at the JSON Path represented
// by this jsonPeg instance. Missing intermediat children will be created
// when needed (`mkdir -p` behavior), but if, say, there is an Int located
// at `foo.bar`, then assigning to `foo.bar.hoge` will fail.
func (j *jsonPeg) set(m Map, v Value) error {
if m == nil || m.Type() == TypeNull {
return fmt.Errorf("given Map is inaccessible")
}
// `current` holds the Value into which we descend, the extracted
// value is then written to `next` by `c.extractForSet()`. By assigning
// `current = next` after `c.extractForSet()` returns, we can go deeper.
var current Value = m
var next Value
// setValueInParent is a closure that
// - is set in `extractForSet`
// - when called, writes the given value at the position where `next`
// was located when `extractForSet` returned.
var setValueInParent func(v Value)
for _, c := range j.components {
err := c.extractForSet(current, &next, &setValueInParent)
if err != nil {
return err
}
current = next
}
if setValueInParent == nil {
return fmt.Errorf("setValueInParent was nil, when it shouldn't be")
}
setValueInParent(v)
return nil
}
// extractor describes an entity that can extract a child element
// from a Value.
type extractor interface {
extract(v Value, next *Value) error
extractForSet(Value, *Value, *func(Value)) error
resultMultiplicity() multiplicity
}
// addMapAccess is called when we discover `foo` or `["bar"]`
// in a JSON Path string.
func (j *jsonPeg) addMapAccess(s string) {
j.components = append(j.components, &mapValueExtractor{s})
}
// mapValueExtractor can extract a value from a Map using the
// given key.
type mapValueExtractor struct {
key string
}
func (a *mapValueExtractor) extract(v Value, next *Value) error {
cont, err := AsMap(v)
if err != nil {
return err
}
if elem, ok := cont[a.key]; ok {
*next = elem
return nil
}
return fmt.Errorf("key '%s' was not found in map", a.key)
}
func (a *mapValueExtractor) extractForSet(v Value, next *Value, setInParent *func(Value)) error {
// if there is a NULL value where a map is supposed to be,
// then we will create a map
if v.Type() == TypeNull {
v = Map{a.key: Null{}}
(*setInParent)(v)
}
// access as a Map
cont, err := v.asMap()
if err != nil {
return fmt.Errorf("cannot access a %T using key \"%s\"", v, a.key)
}
// if the Map does not have the key, add it (so that we
// can "descend" further into cont[a.key])
if _, ok := cont[a.key]; !ok {
cont[a.key] = Null{}
}
// invariant: cont[a.key] is a valid entry here, possibly NULL
*setInParent = func(v Value) {
cont[a.key] = v
}
*next = cont[a.key]
return nil
}
func (a *mapValueExtractor) resultMultiplicity() multiplicity {
return one
}
// addRecursiveAccess is called when we discover `..foo` or `..["bar"]`
// in a JSON Path string.
func (j *jsonPeg) addRecursiveAccess(s string) {
j.components = append(j.components, &recursiveExtractor{s})
}
// recursiveExtractor can extract a list of all items with a certain key,
// no matter where they are located in the Map
type recursiveExtractor struct {
key string
}
func (a *recursiveExtractor) extract(v Value, next *Value) error {
var results []Value
if v.Type() == TypeMap {
// if v is a Map, then we append the entry with the correct
// key (if one exists) to the result list and recurse for all
// contained containers
cont, _ := v.asMap()
for key, value := range cont {
if key == a.key {
// NB. We do NOT descend further into `value` even if
// it is itself a Map or Array!
results = append(results, value)
} else if value.Type() == TypeMap || value.Type() == TypeArray {
// recurse
var descend Value
err := a.extract(value, &descend)
if err != nil {
return err
}
// we expect that the results we get from further
// down are in an array shape
subResults, err := descend.asArray()
if err != nil {
return err
}
results = append(results, subResults...)
}
// we ignore all entries in this Map that are not
// container-like or do not have the key we are looking for
}
} else if v.Type() == TypeArray {
// if v is a Map, then we
cont, _ := v.asArray()
for _, value := range cont {
if value.Type() == TypeMap || value.Type() == TypeArray {
// recurse
var descend Value
err := a.extract(value, &descend)
if err != nil {
return err
}
// we expect that the results we get from further
// down are in an array shape
subResults, err := descend.asArray()
if err != nil {
return err
}
results = append(results, subResults...)
}
// we ignore all entries in this Array that are not
// container-like
}
} else {
return fmt.Errorf("cannot descend recursively into %T", v)
}
*next = Array(results)
return nil
}
func (a *recursiveExtractor) extractForSet(v Value, next *Value, setInParent *func(Value)) error {
return fmt.Errorf("not implemented")
}
func (a *recursiveExtractor) resultMultiplicity() multiplicity {
return many
}
// addArrayAccess is called when we discover `[1]` in a JSON Path
// string.
func (j *jsonPeg) addArrayAccess(s string) {
i, err := strconv.ParseInt(s, 10, 32)
// due to parser configuration, s will always be a numeric string,
// but it may overflow int32, so we need a check here.
if err != nil {
// TODO panic is not the gold standard of error handling, but
// at the moment we have no better way to signal an error
// from within jsonPeg.Execute()
panic(fmt.Sprintf("overflow index number: " + s))
}
j.components = append(j.components, &arrayElementExtractor{int(i)})
}
// arrayElementExtractor can extract an element from an Array using
// the given index.
type arrayElementExtractor struct {
idx int
}
func (a *arrayElementExtractor) extract(v Value, next *Value) error {
cont, err := AsArray(v)
if err != nil {
return fmt.Errorf("cannot access a %T using index %d", v, a.idx)
}
idx := a.idx
if a.idx < 0 {
idx = len(cont) + a.idx
}
if 0 <= idx && idx < len(cont) {
*next = cont[idx]
return nil
}
return fmt.Errorf("out of range access: %d (length %d)", a.idx, len(cont))
}
func (a *arrayElementExtractor) extractForSet(v Value, next *Value, setInParent *func(Value)) error {
// if there is a NULL value where an array is supposed to be,
// then we will create an array that holds enough entries
if v.Type() == TypeNull {
x := make(Array, a.idx+1)
for i := range x {
x[i] = Null{}
}
v = x
(*setInParent)(v)
}
// access as an Array
cont, err := v.asArray()
if err != nil {
return fmt.Errorf("cannot access a %T using index %d", v, a.idx)
}
// if the Array is not long enough, pad it with NULLs (so
// that we can "descend" further into cont[a.idx])
if a.idx >= len(cont) {
for i := len(cont); i <= a.idx; i++ {
cont = append(cont, Null{})
}
// we need to write the possibly reallocated slice
// to the correct position
(*setInParent)(cont)
}
// invariant: cont[a.idx] is a valid entry, possibly NULL
*setInParent = func(v Value) {
cont[a.idx] = v
}
*next = cont[a.idx]
return nil
}
func (a *arrayElementExtractor) resultMultiplicity() multiplicity {
return one
}
// addArraySlice is called when we discover `[1:3]` or `[1:3:2]` in a
// JSON Path string.
func (j *jsonPeg) addArraySlice(s string) {
parts := strings.Split(s, ":")
if !(len(parts) == 2 || len(parts) == 3) {
panic(fmt.Sprintf("'%s' did not have format 'a:b' or 'a:b:c'", s))
}
// store for each component if that component was set by the user
var startSet, endSet, stepSet bool
var start, end int64
var step int64 = 1
var err error
if parts[0] == "" { // [:b] situation
start = 0
} else { // [a:b] or [a:b:c]
startSet = true
start, err = strconv.ParseInt(parts[0], 10, 32)
// due to parser setup, s will always contain numeric strings,
// but they may overflow int32, so we need a check here.
if err != nil {
panic(fmt.Sprintf("overflow index number: " + parts[0]))
}
}
if parts[1] != "" { // [a:b] situation
endSet = true
end, err = strconv.ParseInt(parts[1], 10, 32)
// due to parser setup, s will always contain numeric strings,
// but they may overflow int32, so we need a check here.
if err != nil {
panic(fmt.Sprintf("overflow index number: " + parts[1]))
}
}
if len(parts) == 3 && parts[2] != "" { // [a:b:c] situation
stepSet = true
step, err = strconv.ParseInt(parts[2], 10, 32)
// due to parser setup, s will always contain numeric strings,
// but they may overflow int32, so we need a check here.
if err != nil {
panic(fmt.Sprintf("overflow index number: " + parts[2]))
}
}
if step == 0 {
panic("step must not be 0")
}
// validation of the step sign/direction can only happen/
// if start and end have the same sign. (we don't know if
// `[10:-10:2]` is valid or not without a particular list.)
if (start >= 0 && end >= 0) || (start < 0 && end < 0) {
if start > end && step > 0 && endSet && startSet {
panic(fmt.Sprintf("start index %d must be less or equal to "+
"end index %d when step is positive", start, end))
} else if start < end && step < 0 {
panic(fmt.Sprintf("start index %d must be greater or equal to "+
"end index %d when step is negative", start, end))
}
}
j.components = append(j.components, &arraySliceExtractor{int(start), int(end), int(step),
startSet, endSet, stepSet})
}
func (j *jsonPeg) String() string {
return j.Buffer
}
// arraySliceExtractor can extract a slice from an Array using the
// given start/end indexes.
type arraySliceExtractor struct {
start, end, step int
startSet, endSet, stepSet bool
}
func (a *arraySliceExtractor) extract(v Value, next *Value) error {
cont, err := AsArray(v)
if err != nil {
return fmt.Errorf("cannot access a %T using range %d:%d", v, a.start, a.end)
}
start := a.start
if a.start < 0 {
start = len(cont) + a.start
} else if !a.startSet {
start = 0
}
end := a.end
if a.end < 0 {
end = len(cont) + a.end
} else if !a.endSet {
end = len(cont)
}
// there are now two possible valid conditions:
// 1. start <= end && step > 0 (count upwards)
// 2. start >= end && step < 0 (count downwards)
if start <= end && a.step > 0 {
// truncate start and end to valid ranges
if start < 0 {
start = 0
}
if end > len(cont) {
end = len(cont)
}
if start >= len(cont) || end < start {
*next = Array{}
} else {
// copy the values into a new array
retVal := make(Array, 0, end-start)
for i := start; i < end; i += a.step {
retVal = append(retVal, cont[i])
}
*next = retVal
}
} else if start >= end && a.step < 0 {
// truncate start and end to valid ranges
if start >= len(cont) {
start = len(cont) - 1
}
if end < 0 {
end = -1
}
if start < 0 || start < end {
*next = Array{}
} else {
// copy the values into a new array
retVal := make(Array, 0, start-end)
for i := start; i > end; i += a.step {
retVal = append(retVal, cont[i])
}
*next = retVal
}
} else {
*next = Array{}
}
return nil
}
func (a *arraySliceExtractor) extractForSet(v Value, next *Value, setInParent *func(Value)) error {
return fmt.Errorf("not implemented")
}
func (a *arraySliceExtractor) resultMultiplicity() multiplicity {
return many
}