/
deserialize.go
613 lines (536 loc) · 16.6 KB
/
deserialize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
package json
import (
"github.com/imulab/go-scim/core/errors"
"github.com/imulab/go-scim/core/prop"
"github.com/imulab/go-scim/core/spec"
"strconv"
"unicode"
"unicode/utf16"
"unicode/utf8"
)
// Entry point of JSON deserialization. Unmarshal the JSON input bytes into the unassigned
// structure of resource.
func Deserialize(json []byte, resource *prop.Resource) error {
if err := checkValid(json, &scanner{}); err != nil {
return err
}
state := &deserializeState{
data: json,
off: 0,
opCode: scanContinue,
scan: scanner{},
navigator: resource.NewNavigator(),
}
state.scan.reset()
// skip the first few spaces
state.scanWhile(scanSkipSpace)
return state.parseComplexProperty(false)
}
// Entry point to deserialize a piece of JSON data into the given property. The JSON data is expected to be the content
// of a json.RawMessage parsed from the built-in encoding/json mechanism, hence, it should not contain any preceding
// spaces, and should a fragment of valid JSON.
// The allowElementForArray option is provided to allow JSON array element values be provided for a multiValued property
// so that it will be de-serialized as its element. The result will be a multiValued property containing a single element.
func DeserializeProperty(json []byte, property prop.Property, allowElementForArray bool) error {
state := &deserializeState{
data: json,
off: 0,
opCode: scanContinue,
scan: scanner{},
navigator: prop.NewNavigator(property),
}
state.scan.reset()
// Since this function is intended for bytes from json.RawMessage, it is not possible for it to precede with
// spaces. Hence, simply use scanNext to read in the first byte, then use stateBeginValue to forcibly set the
// state and op code. This is necessary since we are dealing with potentially just a fragment of valid JSON.
state.scanNext()
state.opCode = stateBeginValue(&state.scan, state.data[0])
if property.Attribute().SingleValued() {
return state.parseSingleValuedProperty()
} else {
// Check the value is indeed a JSON array
if state.data[0] == '[' {
return state.parseMultiValuedProperty()
}
// We may choose to allow callers to provide value that corresponds to multiValue element
// to be provided as a value for the multiValue property itself. If this feature is enabled,
// we will parse the value as the multiValued element and add it to the multiValued container.
if !allowElementForArray {
return state.errInvalidSyntax("expects JSON array")
}
i := state.navigator.Current().(prop.Container).NewChild()
if _, err := state.navigator.FocusIndex(i); err != nil {
return err
}
return state.parseSingleValuedProperty()
}
}
// State of the deserialization process. In essence, a scanner is used to infer contextual information about what the
// current byte means. It is used in conjunction with off (offset) and opCode. In addition, a navigator is used to record
// the tracks of traversal inside the complete structure of the resource. The location of properties can be in sync with
// the current context by reacting to some signals emitted by the scanner, such as scanStartObject or scanEndArray.
//
// As a side note, all parseXXX methods of this object shall maintain one courtesy: after done parsing the part of the
// data of interest to the method, consume as much empty spaces or separators (i.e. scanObjectValue, scanArrayValue) as
// possible so that the next parseXXX method invoked will not have to skip spaces as its first task.
type deserializeState struct {
data []byte
off int // next read offset in data
opCode int // last read result
scan scanner
navigator *prop.Navigator
}
func (d *deserializeState) errInvalidSyntax(msg string, args ...interface{}) error {
return errors.InvalidSyntax("failed to parse json: "+msg+" (idx: %d)", append(args, d.off)...)
}
func (d *deserializeState) errInvalidValue(msg string, args ...interface{}) error {
return errors.InvalidValue("failed to parse json: "+msg+" (idx: %d)", append(args, d.off)...)
}
// Parses the attribute/field name in a JSON object. This method expects a quoted string and skips through
// as much empty spaces and colon (appears as scanObjectKey) after it as possible.
func (d *deserializeState) parseFieldName() (string, error) {
if d.opCode != scanBeginLiteral {
return "", d.errInvalidSyntax("expects attribute name")
}
start := d.off - 1 // position of the first double quote
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the second double quote
Skip:
for {
switch d.opCode {
case scanObjectKey, scanSkipSpace:
d.scanNext()
default:
break Skip
}
}
return string(d.data[start+1 : end-1]), nil
}
// Parses a top level or embedded JSON object. When parsing a top level object, allowNull shall be false as top level
// object does not correspond to any field name and hence cannot be null; when parsing an embedded object, allowNull may
// be true. This method expects '{' (appears as scanBeginObject) to be the current byte
func (d *deserializeState) parseComplexProperty(allowNull bool) error {
// expects '{', and depending on allowNull, allowing for the null literal.
if d.opCode != scanBeginObject {
if allowNull && d.opCode == scanBeginLiteral {
return d.parseNull()
}
return d.errInvalidSyntax("expects a json object")
}
// skip any potential spaces between '{' and '"'
d.scanWhile(scanSkipSpace)
kvs:
for d.opCode != scanEndObject {
// Focus on the property that corresponds to the field name
var (
p prop.Property
err error
)
{
attrName, err := d.parseFieldName()
if err != nil {
return err
}
p, err = d.navigator.FocusName(attrName)
if err != nil {
return err
}
}
// Parse field value
if p.Attribute().MultiValued() {
err = d.parseMultiValuedProperty()
} else {
err = d.parseSingleValuedProperty()
}
if err != nil {
return err
}
// Exit focus on the field value property
d.navigator.Retract()
// Fast forward to the next field name/value pair, or exit the loop.
fastForward:
for {
switch d.opCode {
case scanEndObject:
d.scanNext()
break kvs
case scanEnd:
break kvs
case scanSkipSpace, scanObjectValue:
d.scanNext()
default:
break fastForward
}
}
}
// Courtesy: skip any spaces between '}' and the next tokens
if d.opCode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
return nil
}
// Delegate method to parse single valued field values. The caller must ensure that the currently focused property
// is indeed single valued.
func (d *deserializeState) parseSingleValuedProperty() error {
switch d.navigator.Current().Attribute().Type() {
case spec.TypeString, spec.TypeDateTime, spec.TypeBinary, spec.TypeReference:
return d.parseStringProperty()
case spec.TypeInteger:
return d.parseIntegerProperty()
case spec.TypeDecimal:
return d.parseDecimalProperty()
case spec.TypeBoolean:
return d.parseBooleanProperty()
case spec.TypeComplex:
return d.parseComplexProperty(true)
default:
panic("invalid attribute type")
}
}
// Parses a JSON array. This method expects '[' (appears as scanBeginArray) to be the current byte, or the literal
// null.
func (d *deserializeState) parseMultiValuedProperty() error {
// Expect '[' or null.
if d.opCode != scanBeginArray {
if d.opCode == scanBeginLiteral {
return d.parseNull()
}
return d.errInvalidSyntax("expects JSON array")
}
// Skip any spaces between '[' and the potential first element
d.scanWhile(scanSkipSpace)
elements:
for d.opCode != scanEndArray {
// Create the place-holding element prototype and focus on it
i := d.navigator.Current().(prop.Container).NewChild()
_, err := d.navigator.FocusIndex(i)
if err != nil {
return err
}
// Parse the focused element property
err = d.parseSingleValuedProperty()
if err != nil {
return err
}
// Exit the focus
d.navigator.Retract()
// Fast forward to the next element, or exit the loop.
fastForward:
for {
switch d.opCode {
case scanEndArray:
d.scanNext()
break elements
case scanSkipSpace, scanArrayValue:
d.scanNext()
default:
break fastForward
}
}
}
// Courtesy: skip any spaces between ']' and the next tokens
if d.opCode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
return nil
}
// Parses a JSON string. This method expects a double quoted literal and the null literal.
func (d *deserializeState) parseStringProperty() error {
p := d.navigator.Current()
// check property type
if !(p.Attribute().SingleValued() && (p.Attribute().Type() == spec.TypeString ||
p.Attribute().Type() == spec.TypeDateTime ||
p.Attribute().Type() == spec.TypeReference ||
p.Attribute().Type() == spec.TypeBinary)) {
return d.errInvalidSyntax("expects string based property for '%s'", p.Attribute().Path())
}
// should start with literal
if d.opCode != scanBeginLiteral {
return d.errInvalidSyntax("expects json literal")
}
start := d.off - 1 // position of the first double quote
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the second double quote
if d.isNull(start, end) {
return d.navigator.Current().Delete()
}
if d.data[start] != '"' || d.data[end-1] != '"' {
return d.errInvalidSyntax("expects string literal value for '%s'", p.Attribute().Path())
}
v, ok := unquote(d.data[start:end])
if !ok {
return d.errInvalidSyntax("failed to unquote json string for '%s'", p.Attribute().Path())
}
return d.navigator.Current().Replace(v)
}
// Parses a JSON integer. This method expects an integer literal and the null literal.
func (d *deserializeState) parseIntegerProperty() error {
p := d.navigator.Current()
// check property type
if !(p.Attribute().SingleValued() && p.Attribute().Type() == spec.TypeInteger) {
return d.errInvalidSyntax("expects integer property for '%s'", p.Attribute().Path())
}
// should start with literal
if d.opCode != scanBeginLiteral {
return d.errInvalidSyntax("expects property value")
}
start := d.off - 1 // position of the first character of the literal
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the end of the literal
if d.isNull(start, end) {
return d.navigator.Current().Delete()
}
val, err := strconv.ParseInt(string(d.data[start:end]), 10, 64)
if err != nil {
return errors.InvalidValue("expects integer value")
}
return d.navigator.Current().Replace(val)
}
// Parses a JSON boolean. This method expects the true, false, or null literal.
func (d *deserializeState) parseBooleanProperty() error {
p := d.navigator.Current()
// check property type
if !(p.Attribute().SingleValued() && p.Attribute().Type() == spec.TypeBoolean) {
return d.errInvalidSyntax("expects decimal property for '%s'", p.Attribute().Path())
}
// should start with literal
if d.opCode != scanBeginLiteral {
return d.errInvalidSyntax("expects property value")
}
start := d.off - 1 // position of the first character of the literal
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the end of the literal
if d.isNull(start, end) {
return d.navigator.Current().Delete()
}
if d.isTrue(start, end) {
return d.navigator.Current().Replace(true)
} else if d.isFalse(start, end) {
return d.navigator.Current().Replace(false)
} else {
return d.errInvalidValue("expects boolean value")
}
}
// Parses a JSON decimal. This method expects a decimal literal and the null literal.
func (d *deserializeState) parseDecimalProperty() error {
p := d.navigator.Current()
// check property type
if !(p.Attribute().SingleValued() && p.Attribute().Type() == spec.TypeDecimal) {
return d.errInvalidSyntax("expects decimal property for '%s'", p.Attribute().Path())
}
// should start with literal
if d.opCode != scanBeginLiteral {
return d.errInvalidSyntax("expects property value")
}
start := d.off - 1 // position of the first character of the literal
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the end of the literal
if d.isNull(start, end) {
return d.navigator.Current().Delete()
}
val, err := strconv.ParseFloat(string(d.data[start:end]), 64)
if err != nil {
return errors.InvalidValue("expects decimal value")
}
return d.navigator.Current().Replace(val)
}
// Parses the JSON null literal.
func (d *deserializeState) parseNull() error {
// should start with literal
if d.opCode != scanBeginLiteral {
return d.errInvalidSyntax("expects property value")
}
start := d.off - 1 // position of the first character of the literal
d.scanWhile(scanContinue)
end := d.off - 1 // position of the character after the end of the literal
if !d.isNull(start, end) {
return d.errInvalidSyntax("expects null")
}
return d.navigator.Current().Delete()
}
func (d *deserializeState) isNull(start, end int) bool {
return end-start == 4 &&
d.data[start] == 'n' &&
d.data[start+1] == 'u' &&
d.data[start+2] == 'l' &&
d.data[start+3] == 'l'
}
func (d *deserializeState) isTrue(start, end int) bool {
return end-start == 4 &&
d.data[start] == 't' &&
d.data[start+1] == 'r' &&
d.data[start+2] == 'u' &&
d.data[start+3] == 'e'
}
func (d *deserializeState) isFalse(start, end int) bool {
return end-start == 5 &&
d.data[start] == 'f' &&
d.data[start+1] == 'a' &&
d.data[start+2] == 'l' &&
d.data[start+3] == 's' &&
d.data[start+4] == 'e'
}
// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
func (d *deserializeState) scanWhile(op int) {
s, data, i := &d.scan, d.data, d.off
for i < len(d.data) {
newOp := s.step(s, data[i])
i++
if newOp != op {
d.opCode = newOp
d.off = i
return
}
}
d.off = len(d.data) + 1 // mark processed EOF with len+1
d.opCode = d.scan.eof()
}
// scanNext processed the next byte (as in d.data[d.off])
func (d *deserializeState) scanNext() {
s, data, i := &d.scan, d.data, d.off
if i < len(data) {
d.opCode = s.step(s, data[i])
d.off = i + 1
} else {
d.opCode = s.eof()
d.off = len(data) + 1 // mark processed EOF with len+1
}
}
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
s, ok = unquoteBytes(s)
t = string(s)
return
}
func unquoteBytes(s []byte) (t []byte, ok bool) {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return
}
s = s[1 : len(s)-1]
// Check for unusual characters. If there are none,
// then no unquoting is needed, so return a slice of the
// original bytes.
r := 0
for r < len(s) {
c := s[r]
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 {
break
}
r += size
}
if r == len(s) {
return s, true
}
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
for r < len(s) {
// Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
copy(nb, b[0:w])
b = nb
}
switch c := s[r]; {
case c == '\\':
r++
if r >= len(s) {
return
}
switch s[r] {
default:
return
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r--
rr := getu4(s[r:])
if rr < 0 {
return
}
r += 6
if utf16.IsSurrogate(rr) {
rr1 := getu4(s[r:])
if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
// A valid pair; consume.
r += 6
w += utf8.EncodeRune(b[w:], dec)
break
}
// Invalid surrogate; fall back to replacement rune.
rr = unicode.ReplacementChar
}
w += utf8.EncodeRune(b[w:], rr)
}
// Quote, control characters are invalid.
case c == '"', c < ' ':
return
// ASCII
case c < utf8.RuneSelf:
b[w] = c
r++
w++
// Coerce to well-formed UTF-8.
default:
rr, size := utf8.DecodeRune(s[r:])
r += size
w += utf8.EncodeRune(b[w:], rr)
}
}
return b[0:w], true
}
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
var r rune
for _, c := range s[2:6] {
switch {
case '0' <= c && c <= '9':
c = c - '0'
case 'a' <= c && c <= 'f':
c = c - 'a' + 10
case 'A' <= c && c <= 'F':
c = c - 'A' + 10
default:
return -1
}
r = r*16 + rune(c)
}
return r
}