Skip to content
Browse files

Update vendored OkJson to 63be7f50c1

  • Loading branch information...
1 parent e703267 commit 43c73951ca5ac9189d5f650a052f304b9fd93bc4 @sferik sferik committed
Showing with 81 additions and 66 deletions.
  1. +81 −66 lib/multi_json/vendor/ok_json.rb
View
147 lib/multi_json/vendor/ok_json.rb
@@ -1,4 +1,6 @@
-# Copyright 2011 Keith Rarick
+# encoding: UTF-8
+#
+# Copyright 2011, 2012 Keith Rarick
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -166,7 +168,7 @@ def eat(typ, ts)
end
- # Sans s and returns a list of json tokens,
+ # Scans s and returns a list of json tokens,
# excluding white space (as defined in RFC 4627).
def lex(s)
ts = []
@@ -186,7 +188,7 @@ def lex(s)
# Scans the first token in s and
# returns a 3-element list, or nil
- # if no such token exists.
+ # if s does not begin with a valid token.
#
# The first list element is one of
# '{', '}', ':', ',', '[', ']',
@@ -218,9 +220,9 @@ def tok(s)
end
- def nulltok(s); s[0,4] == 'null' && [:val, 'null', nil] end
- def truetok(s); s[0,4] == 'true' && [:val, 'true', true] end
- def falsetok(s); s[0,5] == 'false' && [:val, 'false', false] end
+ def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end
+ def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end
+ def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
def numtok(s)
@@ -233,6 +235,8 @@ def numtok(s)
else
[:val, m[0], Integer(m[0])]
end
+ else
+ []
end
end
@@ -261,6 +265,12 @@ def abbrev(s)
def unquote(q)
q = q[1...-1]
a = q.dup # allocate a big enough string
+ rubydoesenc = false
+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
+ if a.class.method_defined?(:force_encoding)
+ a.force_encoding('UTF-8')
+ rubydoesenc = true
+ end
r, w = 0, 0
while r < q.length
c = q[r]
@@ -298,7 +308,12 @@ def unquote(q)
end
end
end
- w += ucharenc(a, w, uchar)
+ if rubydoesenc
+ a[w] = '' << uchar
+ w += 1
+ else
+ w += ucharenc(a, w, uchar)
+ end
else
raise Error, "invalid escape char #{q[r]} in \"#{q}\""
end
@@ -308,6 +323,8 @@ def unquote(q)
# Copy anything else byte-for-byte.
# Valid UTF-8 will remain valid UTF-8.
# Invalid UTF-8 will remain invalid UTF-8.
+ # In ruby >= 1.9, c is a codepoint, not a byte,
+ # in which case this is still what we want.
a[w] = c
r += 1
w += 1
@@ -360,15 +377,6 @@ def subst(u1, u2)
end
- def unsubst(u)
- if u < Usurrself || u > Umax || surrogate?(u)
- return Ucharerr, Ucharerr
- end
- u -= Usurrself
- [Usurr1 + ((u>>10)&0x3ff), Usurr2 + (u&0x3ff)]
- end
-
-
def surrogate?(u)
Usurr1 <= u && u < Usurr3
end
@@ -446,6 +454,10 @@ def strenc(s)
t = StringIO.new
t.putc(?")
r = 0
+
+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
+ rubydoesenc = s.class.method_defined?(:encoding)
+
while r < s.length
case s[r]
when ?" then t.print('\\"')
@@ -458,23 +470,18 @@ def strenc(s)
else
c = s[r]
case true
+ when rubydoesenc
+ begin
+ c.ord # will raise an error if c is invalid UTF-8
+ t.write(c)
+ rescue
+ t.write(Ustrerr)
+ end
when Spc <= c && c <= ?~
t.putc(c)
- when true
- u, size = uchardec(s, r)
- r += size - 1 # we add one more at the bottom of the loop
- if u < 0x10000
- t.print('\\u')
- hexenc4(t, u)
- else
- u1, u2 = unsubst(u)
- t.print('\\u')
- hexenc4(t, u1)
- t.print('\\u')
- hexenc4(t, u2)
- end
else
- # invalid byte; skip it
+ n = ucharcopy(t, s, r) # ensure valid UTF-8 output
+ r += n - 1 # r is incremented below
end
end
r += 1
@@ -484,76 +491,85 @@ def strenc(s)
end
- def hexenc4(t, u)
- t.putc(Hex[(u>>12)&0xf])
- t.putc(Hex[(u>>8)&0xf])
- t.putc(Hex[(u>>4)&0xf])
- t.putc(Hex[u&0xf])
- end
-
-
def numenc(x)
- if x.nan? || x.infinite?
- return 'null'
- end rescue nil
+ if ((x.nan? || x.infinite?) rescue false)
+ raise Error, "Numeric cannot be represented: #{x}"
+ end
"#{x}"
end
- # Decodes unicode character u from UTF-8
- # bytes in string s at position i.
- # Returns u and the number of bytes read.
- def uchardec(s, i)
+ # Copies the valid UTF-8 bytes of a single character
+ # from string s at position i to I/O object t, and
+ # returns the number of bytes copied.
+ # If no valid UTF-8 char exists at position i,
+ # ucharcopy writes Ustrerr and returns 1.
+ def ucharcopy(t, s, i)
n = s.length - i
- return [Ucharerr, 1] if n < 1
+ raise Utf8Error if n < 1
c0 = s[i].ord
# 1-byte, 7-bit sequence?
if c0 < Utagx
- return [c0, 1]
+ t.putc(c0)
+ return 1
end
- # unexpected continuation byte?
- return [Ucharerr, 1] if c0 < Utag2
+ raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
- # need continuation byte
- return [Ucharerr, 1] if n < 2
+ raise Utf8Error if n < 2 # need continuation byte
c1 = s[i+1].ord
- return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1
+ raise Utf8Error if c1 < Utagx || Utag2 <= c1
# 2-byte, 11-bit sequence?
if c0 < Utag3
- u = (c0&Umask2)<<6 | (c1&Umaskx)
- return [Ucharerr, 1] if u <= Uchar1max
- return [u, 2]
+ raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
+ t.putc(c0)
+ t.putc(c1)
+ return 2
end
# need second continuation byte
- return [Ucharerr, 1] if n < 3
+ raise Utf8Error if n < 3
+
c2 = s[i+2].ord
- return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2
+ raise Utf8Error if c2 < Utagx || Utag2 <= c2
# 3-byte, 16-bit sequence?
if c0 < Utag4
u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
- return [Ucharerr, 1] if u <= Uchar2max
- return [u, 3]
+ raise Utf8Error if u <= Uchar2max
+ t.putc(c0)
+ t.putc(c1)
+ t.putc(c2)
+ return 3
end
# need third continuation byte
- return [Ucharerr, 1] if n < 4
+ raise Utf8Error if n < 4
c3 = s[i+3].ord
- return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3
+ raise Utf8Error if c3 < Utagx || Utag2 <= c3
# 4-byte, 21-bit sequence?
if c0 < Utag5
u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
- return [Ucharerr, 1] if u <= Uchar3max
- return [u, 4]
+ raise Utf8Error if u <= Uchar3max
+ t.putc(c0)
+ t.putc(c1)
+ t.putc(c2)
+ t.putc(c3)
+ return 4
end
- return [Ucharerr, 1]
+ raise Utf8Error
+ rescue Utf8Error
+ t.write(Ustrerr)
+ return 1
+ end
+
+
+ class Utf8Error < ::StandardError
end
@@ -574,14 +590,13 @@ class Error < ::StandardError
Uchar2max = (1<<11) - 1
Uchar3max = (1<<16) - 1
Ucharerr = 0xFFFD # unicode "replacement char"
+ Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
Usurrself = 0x10000
Usurr1 = 0xd800
Usurr2 = 0xdc00
Usurr3 = 0xe000
- Umax = 0x10ffff
Spc = ' '[0]
Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
- Hex = '0123456789abcdef'
end
end

0 comments on commit 43c7395

Please sign in to comment.
Something went wrong with that request. Please try again.