From 145f279b42c6d29d29a6fd9de4d24b3d65c1eabe Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 07:40:36 +0000 Subject: [PATCH 1/9] std.json: Remove unused error parameter to appendJSONChar --- std/json.d | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/std/json.d b/std/json.d index 79c79bb7042..7e829d2fa84 100644 --- a/std/json.d +++ b/std/json.d @@ -835,7 +835,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) default: auto c = getChar(); - appendJSONChar(str, c, options, &error); + appendJSONChar(str, c, options); goto Next; } @@ -1113,8 +1113,7 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o case '\r': json.put("\\r"); break; case '\t': json.put("\\t"); break; default: - appendJSONChar(json, c, options, - (msg) { throw new JSONException(msg); }); + appendJSONChar(json, c, options); } } @@ -1281,8 +1280,7 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o return json.data; } -private void appendJSONChar(ref Appender!string dst, dchar c, JSONOptions opts, - scope void delegate(string) error) @safe +private void appendJSONChar(ref Appender!string dst, dchar c, JSONOptions opts) @safe { import std.uni : isControl; From 55aa34e4407cd24a29f5e271a3fa318d56acf487 Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 08:08:07 +0000 Subject: [PATCH 2/9] Fix Issue 17555 - [REG2.070.0] Control characters in JSON data are invalid and should cause an exception --- std/json.d | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/std/json.d b/std/json.d index 7e829d2fa84..c7f1f654eb9 100644 --- a/std/json.d +++ b/std/json.d @@ -793,6 +793,8 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) string parseString() { + import std.uni : isControl; + auto str = appender!string(); Next: @@ -835,7 +837,9 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) default: auto c = getChar(); - appendJSONChar(str, c, options); + if (isControl(c)) + error("Illegal control character."); + str.put(c); goto Next; } @@ -1735,3 +1739,10 @@ pure nothrow @safe unittest // issue 15884 assert(test(minSub)); assert(test(3*minSub)); } + +@safe unittest // issue 17555 +{ + import std.exception : assertThrown; + + assertThrown!JSONException(parseJSON("\"a\nb\"")); +} From e44666fc51cc41ec1ec1e120882916a70a9c62dc Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 08:11:39 +0000 Subject: [PATCH 3/9] std.json: Inline appendJSONChar Since the previous commit, it was only called from one place. --- std/json.d | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/std/json.d b/std/json.d index c7f1f654eb9..d12280ff84f 100644 --- a/std/json.d +++ b/std/json.d @@ -1117,7 +1117,25 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o case '\r': json.put("\\r"); break; case '\t': json.put("\\t"); break; default: - appendJSONChar(json, c, options); + { + import std.uni : isControl; + + with (JSONOptions) if (isControl(c) || + ((options & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) + { + json.put("\\u"); + foreach_reverse (i; 0 .. 4) + { + char ch = (c >>> (4 * i)) & 0x0f; + ch += ch < 10 ? '0' : 'A' - 10; + json.put(ch); + } + } + else + { + json.put(c); + } + } } } @@ -1284,27 +1302,6 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o return json.data; } -private void appendJSONChar(ref Appender!string dst, dchar c, JSONOptions opts) @safe -{ - import std.uni : isControl; - - with (JSONOptions) if (isControl(c) || - ((opts & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) - { - dst.put("\\u"); - foreach_reverse (i; 0 .. 4) - { - char ch = (c >>> (4 * i)) & 0x0f; - ch += ch < 10 ? '0' : 'A' - 10; - dst.put(ch); - } - } - else - { - dst.put(c); - } -} - @safe unittest // bugzilla 12897 { JSONValue jv0 = JSONValue("test测试"); From 941e2936b6c90fce0b11eada9605c231ca498e9f Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 08:41:37 +0000 Subject: [PATCH 4/9] Fix Issue 17556 - std.json encodes non-BMP characters incorrectly with JSONOptions.escapeNonAsciiChars --- std/json.d | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/std/json.d b/std/json.d index d12280ff84f..5f177921b70 100644 --- a/std/json.d +++ b/std/json.d @@ -1119,16 +1119,24 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o default: { import std.uni : isControl; + import std.utf : encode; with (JSONOptions) if (isControl(c) || ((options & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) { - json.put("\\u"); - foreach_reverse (i; 0 .. 4) + // Ensure non-BMP characters are encoded as a pair + // of UTF-16 surrogate characters, as per RFC 4627. + wchar[2] wchars; // 1 or 2 UTF-16 code units + size_t wNum = encode(wchars, c); // number of UTF-16 code units + foreach (wc; wchars[0..wNum]) { - char ch = (c >>> (4 * i)) & 0x0f; - ch += ch < 10 ? '0' : 'A' - 10; - json.put(ch); + json.put("\\u"); + foreach_reverse (i; 0 .. 4) + { + char ch = (wc >>> (4 * i)) & 0x0f; + ch += ch < 10 ? '0' : 'A' - 10; + json.put(ch); + } } } else @@ -1743,3 +1751,10 @@ pure nothrow @safe unittest // issue 15884 assertThrown!JSONException(parseJSON("\"a\nb\"")); } + +@safe unittest // issue 17556 +{ + auto v = JSONValue("\U0001D11E"); + auto j = toJSON(v, false, JSONOptions.escapeNonAsciiChars); + assert(j == `"\uD834\uDD1E"`); +} From b3399112ca4f25a37811a764e07c5e583e5b589e Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 08:49:19 +0000 Subject: [PATCH 5/9] std.json: Refactor parsing Unicode character escapes into a new function --- std/json.d | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/std/json.d b/std/json.d index 5f177921b70..36971a5ff0c 100644 --- a/std/json.d +++ b/std/json.d @@ -791,6 +791,18 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) return true; } + wchar parseWChar() + { + wchar val = 0; + foreach_reverse (i; 0 .. 4) + { + auto hex = toUpper(getChar()); + if (!isHexDigit(hex)) error("Expecting hex character"); + val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i); + } + return val; + } + string parseString() { import std.uni : isControl; @@ -818,13 +830,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) case 'r': str.put('\r'); break; case 't': str.put('\t'); break; case 'u': - dchar val = 0; - foreach_reverse (i; 0 .. 4) - { - auto hex = toUpper(getChar()); - if (!isHexDigit(hex)) error("Expecting hex character"); - val += (isDigit(hex) ? hex - '0' : hex - ('A' - 10)) << (4 * i); - } + wchar val = parseWChar(); char[4] buf; immutable len = encode!(Yes.useReplacementDchar)(buf, val); str.put(buf[0 .. len]); From b23e7a4107cc2eb3275e022cb46f7270e586ca29 Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 09:05:51 +0000 Subject: [PATCH 6/9] Fix Issue 5904 - std.json parseString doesn't handle chars outside the BMP --- std/json.d | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/std/json.d b/std/json.d index 36971a5ff0c..e4100cf2448 100644 --- a/std/json.d +++ b/std/json.d @@ -704,8 +704,6 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) { import std.ascii : isWhite, isDigit, isHexDigit, toUpper, toLower; import std.typecons : Yes; - import std.utf : encode; - JSONValue root; root.type_tag = JSON_TYPE.NULL; @@ -805,7 +803,8 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) string parseString() { - import std.uni : isControl; + import std.uni : isControl, isSurrogateHi, isSurrogateLo; + import std.utf : encode, decode; auto str = appender!string(); @@ -830,7 +829,27 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) case 'r': str.put('\r'); break; case 't': str.put('\t'); break; case 'u': - wchar val = parseWChar(); + wchar wc = parseWChar(); + dchar val; + // Non-BMP characters are escaped as a pair of + // UTF-16 surrogate characters (see RFC 4627). + if (isSurrogateHi(wc)) + { + wchar[2] pair; + pair[0] = wc; + if (getChar() != '\\') error("Expected escaped low surrogate after escaped high surrogate"); + if (getChar() != 'u') error("Expected escaped low surrogate after escaped high surrogate"); + pair[1] = parseWChar(); + size_t index = 0; + val = decode(pair[], index); + if (index != 2) error("Invalid escaped surrogate pair"); + } + else + if (isSurrogateLo(wc)) + error(text("Unexpected low surrogate")); + else + val = wc; + char[4] buf; immutable len = encode!(Yes.useReplacementDchar)(buf, val); str.put(buf[0 .. len]); @@ -1764,3 +1783,10 @@ pure nothrow @safe unittest // issue 15884 auto j = toJSON(v, false, JSONOptions.escapeNonAsciiChars); assert(j == `"\uD834\uDD1E"`); } + +@safe unittest // issue 5904 +{ + string s = `"\uD834\uDD1E"`; + auto j = parseJSON(s); + assert(j.str == "\U0001D11E"); +} From 226f8e001c256836bdcc7917443704db93f318c3 Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 10:01:20 +0000 Subject: [PATCH 7/9] Fix Issue 17557 - std.json should not do UTF decoding when parsing --- std/json.d | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/std/json.d b/std/json.d index e4100cf2448..34ca5e8ad46 100644 --- a/std/json.d +++ b/std/json.d @@ -707,10 +707,16 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) JSONValue root; root.type_tag = JSON_TYPE.NULL; + // UTF decoding is unnecessary when parsing JSON. + static if (is(T : const(char)[])) + alias Char = char; + else + alias Char = dchar; + if (json.empty) return root; int depth = -1; - dchar next = 0; + Char next = 0; int line = 1, pos = 0; void error(string msg) @@ -718,11 +724,19 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) throw new JSONException(msg, line, pos); } - dchar popChar() + Char popChar() { if (json.empty) error("Unexpected end of data."); - dchar c = json.front; - json.popFront(); + static if (is(T : const(char)[])) + { + Char c = json[0]; + json = json[1..$]; + } + else + { + Char c = json.front; + json.popFront(); + } if (c == '\n') { @@ -737,7 +751,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) return c; } - dchar peekChar() + Char peekChar() { if (!next) { @@ -752,11 +766,11 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) while (isWhite(peekChar())) next = 0; } - dchar getChar(bool SkipWhitespace = false)() + Char getChar(bool SkipWhitespace = false)() { static if (SkipWhitespace) skipWhitespace(); - dchar c; + Char c; if (next) { c = next; @@ -803,7 +817,8 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) string parseString() { - import std.uni : isControl, isSurrogateHi, isSurrogateLo; + import std.ascii : isControl; + import std.uni : isSurrogateHi, isSurrogateLo; import std.utf : encode, decode; auto str = appender!string(); @@ -861,6 +876,8 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) goto Next; default: + // RFC 7159 states that control characters U+0000 through + // U+001F must not appear unescaped in a JSON string. auto c = getChar(); if (isControl(c)) error("Illegal control character."); @@ -1790,3 +1807,9 @@ pure nothrow @safe unittest // issue 15884 auto j = parseJSON(s); assert(j.str == "\U0001D11E"); } + +@safe unittest // issue 17557 +{ + assert(parseJSON("\"\xFF\"").str == "\xFF"); + assert(parseJSON("\"\U0001D11E\"").str == "\U0001D11E"); +} From 5031ff1446f58a4a76e16d76aa80329d1981cb32 Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 12:28:04 +0000 Subject: [PATCH 8/9] Fix Issue 17553 - std.json should not do UTF decoding when encoding JSON --- std/json.d | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/std/json.d b/std/json.d index 34ca5e8ad46..bed76d4ce33 100644 --- a/std/json.d +++ b/std/json.d @@ -707,7 +707,8 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) JSONValue root; root.type_tag = JSON_TYPE.NULL; - // UTF decoding is unnecessary when parsing JSON. + // Avoid UTF decoding when possible, as it is unnecessary when + // processing JSON. static if (is(T : const(char)[])) alias Char = char; else @@ -1142,11 +1143,11 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o { auto json = appender!string(); - void toString(string str) @safe + void toStringImpl(Char)(string str) @safe { json.put('"'); - foreach (dchar c; str) + foreach (Char c; str) { switch (c) { @@ -1160,9 +1161,14 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o case '\t': json.put("\\t"); break; default: { - import std.uni : isControl; + import std.ascii : isControl; import std.utf : encode; + // Make sure we do UTF decoding iff we want to + // escape Unicode characters. + assert(((options & JSONOptions.escapeNonAsciiChars) != 0) + == is(Char == dchar)); + with (JSONOptions) if (isControl(c) || ((options & escapeNonAsciiChars) >= escapeNonAsciiChars && c >= 0x80)) { @@ -1192,6 +1198,16 @@ string toJSON(const ref JSONValue root, in bool pretty = false, in JSONOptions o json.put('"'); } + void toString(string str) @safe + { + // Avoid UTF decoding when possible, as it is unnecessary when + // processing JSON. + if (options & JSONOptions.escapeNonAsciiChars) + toStringImpl!dchar(str); + else + toStringImpl!char(str); + } + void toValue(ref in JSONValue value, ulong indentLevel) @safe { void putTabs(ulong additionalIndent = 0) @@ -1813,3 +1829,9 @@ pure nothrow @safe unittest // issue 15884 assert(parseJSON("\"\xFF\"").str == "\xFF"); assert(parseJSON("\"\U0001D11E\"").str == "\U0001D11E"); } + +@safe unittest // issue 17553 +{ + auto v = JSONValue("\xFF"); + assert(toJSON(v) == "\"\xFF\""); +} From 71875c09034fc28da8d41b3b92edb7b9b0bbfe3d Mon Sep 17 00:00:00 2001 From: Vladimir Panteleev Date: Mon, 26 Jun 2017 18:52:22 +0000 Subject: [PATCH 9/9] std.json: Fix handling ranges of non-dchars --- std/json.d | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/std/json.d b/std/json.d index bed76d4ce33..0289e6e7cea 100644 --- a/std/json.d +++ b/std/json.d @@ -712,7 +712,7 @@ if (isInputRange!T && !isInfinite!T && isSomeChar!(ElementEncodingType!T)) static if (is(T : const(char)[])) alias Char = char; else - alias Char = dchar; + alias Char = Unqual!(ElementType!T); if (json.empty) return root; @@ -1835,3 +1835,10 @@ pure nothrow @safe unittest // issue 15884 auto v = JSONValue("\xFF"); assert(toJSON(v) == "\"\xFF\""); } + +@safe unittest +{ + import std.utf; + assert(parseJSON("\"\xFF\"".byChar).str == "\xFF"); + assert(parseJSON("\"\U0001D11E\"".byChar).str == "\U0001D11E"); +}