diff --git a/std/json.d b/std/json.d index 79c79bb7042..0289e6e7cea 100644 --- a/std/json.d +++ b/std/json.d @@ -704,15 +704,20 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) { import std.ascii : isWhite, isDigit, isHexDigit, toUpper, toLower; import std.typecons : Yes; - import std.utf : encode; - JSONValue root; root.type_tag = JSON_TYPE.NULL; + // Avoid UTF decoding when possible, as it is unnecessary when + // processing JSON. + static if (is(T : const(char)[])) + alias Char = char; + else + alias Char = Unqual!(ElementType!T); + if (json.empty) return root; int depth = -1; - dchar next = 0; + Char next = 0; int line = 1, pos = 0; void error(string msg) @@ -720,11 +725,19 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) throw new JSONException(msg, line, pos); } - dchar popChar() + Char popChar() { if (json.empty) error("Unexpected end of data."); - dchar c = json.front; - json.popFront(); + static if (is(T : const(char)[])) + { + Char c = json[0]; + json = json[1..$]; + } + else + { + Char c = json.front; + json.popFront(); + } if (c == '\n') { @@ -739,7 +752,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) return c; } - dchar peekChar() + Char peekChar() { if (!next) { @@ -754,11 +767,11 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) while (isWhite(peekChar())) next = 0; } - dchar getChar(bool SkipWhitespace = false)() + Char getChar(bool SkipWhitespace = false)() { static if (SkipWhitespace) skipWhitespace(); - dchar c; + Char c; if (next) { c = next; @@ -791,8 +804,24 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) return true; } + wchar parseWChar() + { + wchar val = 0; + foreach_reverse (i; 0 .. 4) + { + auto hex = toUpper(getChar()); + if (!isHexDigit(hex)) error("Expecting hex character"); + val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i); + } + return val; + } + string parseString() { + import std.ascii : isControl; + import std.uni : isSurrogateHi, isSurrogateLo; + import std.utf : encode, decode; + auto str = appender!string(); Next: @@ -816,13 +845,27 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) case 'r': str.put('\r'); break; case 't': str.put('\t'); break; case 'u': - dchar val = 0; - foreach_reverse (i; 0 .. 4) + wchar wc = parseWChar(); + dchar val; + // Non-BMP characters are escaped as a pair of + // UTF-16 surrogate characters (see RFC 4627). + if (isSurrogateHi(wc)) { - auto hex = toUpper(getChar()); - if (!isHexDigit(hex)) error("Expecting hex character"); - val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i); + wchar[2] pair; + pair[0] = wc; + if (getChar() != '\\') error("Expected escaped low surrogate after escaped high surrogate"); + if (getChar() != 'u') error("Expected escaped low surrogate after escaped high surrogate"); + pair[1] = parseWChar(); + size_t index = 0; + val = decode(pair[], index); + if (index != 2) error("Invalid escaped surrogate pair"); } + else + if (isSurrogateLo(wc)) + error(text("Unexpected low surrogate")); + else + val = wc; + char[4] buf; immutable len = encode!(Yes.useReplacementDchar)(buf, val); str.put(buf[0 .. len]); @@ -834,8 +877,12 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) goto Next; default: + // RFC 7159 states that control characters U+0000 through + // U+001F must not appear unescaped in a JSON string. auto c = getChar(); - appendJSONChar(str, c, options, &error); + if (isControl(c)) + error("Illegal control character."); + str.put(c); goto Next; } @@ -1096,11 +1143,11 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o { auto json = appender!string(); - void toString(string str) @safe + void toStringImpl(Char)(string str) @safe { json.put('"'); - foreach (dchar c; str) + foreach (Char c; str) { switch (c) { @@ -1113,14 +1160,54 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o case '\r': json.put("\\r"); break; case '\t': json.put("\\t"); break; default: - appendJSONChar(json, c, options, - (msg) { throw new JSONException(msg); }); + { + import std.ascii : isControl; + import std.utf : encode; + + // Make sure we do UTF decoding iff we want to + // escape Unicode characters. + assert(((options & JSONOptions.escapeNonAsciiChars) != 0) + == is(Char == dchar)); + + with (JSONOptions) if (isControl(c) || + ((options & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) + { + // Ensure non-BMP characters are encoded as a pair + // of UTF-16 surrogate characters, as per RFC 4627. + wchar[2] wchars; // 1 or 2 UTF-16 code units + size_t wNum = encode(wchars, c); // number of UTF-16 code units + foreach (wc; wchars[0..wNum]) + { + json.put("\\u"); + foreach_reverse (i; 0 .. 4) + { + char ch = (wc >>> (4 * i)) & 0x0f; + ch += ch < 10 ? '0' : 'A' - 10; + json.put(ch); + } + } + } + else + { + json.put(c); + } + } } } json.put('"'); } + void toString(string str) @safe + { + // Avoid UTF decoding when possible, as it is unnecessary when + // processing JSON. + if (options & JSONOptions.escapeNonAsciiChars) + toStringImpl!dchar(str); + else + toStringImpl!char(str); + } + void toValue(ref in JSONValue value, ulong indentLevel) @safe { void putTabs(ulong additionalIndent = 0) @@ -1281,28 +1368,6 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o return json.data; } -private void appendJSONChar(ref Appender!string dst, dchar c, JSONOptions opts, - scope void delegate(string) error) @safe -{ - import std.uni : isControl; - - with (JSONOptions) if (isControl(c) || - ((opts & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) - { - dst.put("\\u"); - foreach_reverse (i; 0 .. 4) - { - char ch = (c >>> (4 * i)) & 0x0f; - ch += ch < 10 ? '0' : 'A' - 10; - dst.put(ch); - } - } - else - { - dst.put(c); - } -} - @safe unittest // bugzilla 12897 { JSONValue jv0 = JSONValue("test测试"); @@ -1737,3 +1802,43 @@ pure nothrow @safe unittest // issue 15884 assert(test(minSub)); assert(test(3*minSub)); } + +@safe unittest // issue 17555 +{ + import std.exception : assertThrown; + + assertThrown!JSONException(parseJSON("\"a\nb\"")); +} + +@safe unittest // issue 17556 +{ + auto v = JSONValue("\U0001D11E"); + auto j = toJSON(v, false, JSONOptions.escapeNonAsciiChars); + assert(j == `"\uD834\uDD1E"`); +} + +@safe unittest // issue 5904 +{ + string s = `"\uD834\uDD1E"`; + auto j = parseJSON(s); + assert(j.str == "\U0001D11E"); +} + +@safe unittest // issue 17557 +{ + assert(parseJSON("\"\xFF\"").str == "\xFF"); + assert(parseJSON("\"\U0001D11E\"").str == "\U0001D11E"); +} + +@safe unittest // issue 17553 +{ + auto v = JSONValue("\xFF"); + assert(toJSON(v) == "\"\xFF\""); +} + +@safe unittest +{ + import std.utf; + assert(parseJSON("\"\xFF\"".byChar).str == "\xFF"); + assert(parseJSON("\"\U0001D11E\"".byChar).str == "\U0001D11E"); +}