Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON fixes #5511

Merged
merged 9 commits into from Jul 3, 2017
187 changes: 146 additions & 41 deletions std/json.d
Expand Up @@ -704,27 +704,40 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
{
import std.ascii : isWhite, isDigit, isHexDigit, toUpper, toLower;
import std.typecons : Yes;
import std.utf : encode;

JSONValue root;
root.type_tag = JSON_TYPE.NULL;

// Avoid UTF decoding when possible, as it is unnecessary when
// processing JSON.
static if (is(T : const(char)[]))
alias Char = char;
else
alias Char = Unqual!(ElementType!T);

if (json.empty) return root;

int depth = -1;
dchar next = 0;
Char next = 0;
int line = 1, pos = 0;

void error(string msg)
{
throw new JSONException(msg, line, pos);
}

dchar popChar()
Char popChar()
{
if (json.empty) error("Unexpected end of data.");
dchar c = json.front;
json.popFront();
static if (is(T : const(char)[]))
{
Char c = json[0];
json = json[1..$];
}
else
{
Char c = json.front;
json.popFront();
}

if (c == '\n')
{
Expand All @@ -739,7 +752,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
return c;
}

dchar peekChar()
Char peekChar()
{
if (!next)
{
Expand All @@ -754,11 +767,11 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
while (isWhite(peekChar())) next = 0;
}

dchar getChar(bool SkipWhitespace = false)()
Char getChar(bool SkipWhitespace = false)()
{
static if (SkipWhitespace) skipWhitespace();

dchar c;
Char c;
if (next)
{
c = next;
Expand Down Expand Up @@ -791,8 +804,24 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
return true;
}

wchar parseWChar()
{
wchar val = 0;
foreach_reverse (i; 0 .. 4)
{
auto hex = toUpper(getChar());
if (!isHexDigit(hex)) error("Expecting hex character");
val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i);
}
return val;
}

string parseString()
{
import std.ascii : isControl;
import std.uni : isSurrogateHi, isSurrogateLo;
import std.utf : encode, decode;

auto str = appender!string();

Next:
Expand All @@ -816,13 +845,27 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
case 'r': str.put('\r'); break;
case 't': str.put('\t'); break;
case 'u':
dchar val = 0;
foreach_reverse (i; 0 .. 4)
wchar wc = parseWChar();
dchar val;
// Non-BMP characters are escaped as a pair of
// UTF-16 surrogate characters (see RFC 4627).
if (isSurrogateHi(wc))
{
auto hex = toUpper(getChar());
if (!isHexDigit(hex)) error("Expecting hex character");
val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i);
wchar[2] pair;
pair[0] = wc;
if (getChar() != '\\') error("Expected escaped low surrogate after escaped high surrogate");
if (getChar() != 'u') error("Expected escaped low surrogate after escaped high surrogate");
pair[1] = parseWChar();
size_t index = 0;
val = decode(pair[], index);
if (index != 2) error("Invalid escaped surrogate pair");
}
else
if (isSurrogateLo(wc))
error(text("Unexpected low surrogate"));
else
val = wc;

char[4] buf;
immutable len = encode!(Yes.useReplacementDchar)(buf, val);
str.put(buf[0 .. len]);
Expand All @@ -834,8 +877,12 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T))
goto Next;

default:
// RFC 7159 states that control characters U+0000 through
// U+001F must not appear unescaped in a JSON string.
auto c = getChar();
appendJSONChar(str, c, options, &error);
if (isControl(c))
error("Illegal control character.");
str.put(c);
goto Next;
}

Expand Down Expand Up @@ -1096,11 +1143,11 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o
{
auto json = appender!string();

void toString(string str) @safe
void toStringImpl(Char)(string str) @safe
{
json.put('"');

foreach (dchar c; str)
foreach (Char c; str)
{
switch (c)
{
Expand All @@ -1113,14 +1160,54 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o
case '\r': json.put("\\r"); break;
case '\t': json.put("\\t"); break;
default:
appendJSONChar(json, c, options,
(msg) { throw new JSONException(msg); });
{
import std.ascii : isControl;
import std.utf : encode;

// Make sure we do UTF decoding iff we want to
// escape Unicode characters.
assert(((options & JSONOptions.escapeNonAsciiChars) != 0)
== is(Char == dchar));

with (JSONOptions) if (isControl(c) ||
((options & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80))
{
// Ensure non-BMP characters are encoded as a pair
// of UTF-16 surrogate characters, as per RFC 4627.
wchar[2] wchars; // 1 or 2 UTF-16 code units
size_t wNum = encode(wchars, c); // number of UTF-16 code units
foreach (wc; wchars[0..wNum])
{
json.put("\\u");
foreach_reverse (i; 0 .. 4)
{
char ch = (wc >>> (4 * i)) & 0x0f;
ch += ch < 10 ? '0' : 'A' - 10;
json.put(ch);
}
}
}
else
{
json.put(c);
}
}
}
}

json.put('"');
}

void toString(string str) @safe
{
// Avoid UTF decoding when possible, as it is unnecessary when
// processing JSON.
if (options & JSONOptions.escapeNonAsciiChars)
toStringImpl!dchar(str);
else
toStringImpl!char(str);
}

void toValue(ref in JSONValue value, ulong indentLevel) @safe
{
void putTabs(ulong additionalIndent = 0)
Expand Down Expand Up @@ -1281,28 +1368,6 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o
return json.data;
}

private void appendJSONChar(ref Appender!string dst, dchar c, JSONOptions opts,
scope void delegate(string) error) @safe
{
import std.uni : isControl;

with (JSONOptions) if (isControl(c) ||
((opts & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80))
{
dst.put("\\u");
foreach_reverse (i; 0 .. 4)
{
char ch = (c >>> (4 * i)) & 0x0f;
ch += ch < 10 ? '0' : 'A' - 10;
dst.put(ch);
}
}
else
{
dst.put(c);
}
}

@safe unittest // bugzilla 12897
{
JSONValue jv0 = JSONValue("test测试");
Expand Down Expand Up @@ -1737,3 +1802,43 @@ pure nothrow @safe unittest // issue 15884
assert(test(minSub));
assert(test(3*minSub));
}

@safe unittest // issue 17555
{
import std.exception : assertThrown;

assertThrown!JSONException(parseJSON("\"a\nb\""));
}

@safe unittest // issue 17556
{
auto v = JSONValue("\U0001D11E");
auto j = toJSON(v, false, JSONOptions.escapeNonAsciiChars);
assert(j == `"\uD834\uDD1E"`);
}

@safe unittest // issue 5904
{
string s = `"\uD834\uDD1E"`;
auto j = parseJSON(s);
assert(j.str == "\U0001D11E");
}

@safe unittest // issue 17557
{
assert(parseJSON("\"\xFF\"").str == "\xFF");
assert(parseJSON("\"\U0001D11E\"").str == "\U0001D11E");
}

@safe unittest // issue 17553
{
auto v = JSONValue("\xFF");
assert(toJSON(v) == "\"\xFF\"");
}

@safe unittest
{
import std.utf;
assert(parseJSON("\"\xFF\"".byChar).str == "\xFF");
assert(parseJSON("\"\U0001D11E\"".byChar).str == "\U0001D11E");
}