From 38f613f1ec1190c665d294393719d8337b0d4b30 Mon Sep 17 00:00:00 2001 From: Max Kerr Date: Fri, 22 Dec 2017 08:45:26 -0800 Subject: [PATCH] Allow bidirectional control characters in System.Uri (#26022) The current behavior of System.Uri is to strip all Unicode bidirectional control characters encountered during parsing, regardless of whether or not they are percent encoded. This is in violation of the IRI spec. The problematic Bidi control characters are represented by the code points x200E, x200f, and x202A-x202E. According to the IRI Spec ABNF, these characters fall under the definition of ucschar. Ucschars are considered unreserved in the IRI spec, and thus should be legal to use. Additionally, during the Uri conversion process RFC 3987 Sec. 3.1 should apply, and un-encoded Bidi characters should be percent encoded. After the fix is applied the behavior of System.Uri will match that of WinRT Windows.Foundation.Uri as well as that of common browsers such as Edge and Chrome. The fix also retains the code used for quirking in .Net Framework, but always enables the new behavior instead of checking an AppContextSwitch. --- src/System.Private.Uri/src/System/IriHelper.cs | 2 +- src/System.Private.Uri/src/System/UriHelper.cs | 2 +- src/System.Private.Uri/src/System/UriSyntax.cs | 10 ++++++++++ .../tests/FunctionalTests/IriEncodingDecodingTests.cs | 6 +++++- .../tests/UnitTests/Fakes/FakeUriParser.cs | 8 ++++++++ 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/System.Private.Uri/src/System/IriHelper.cs b/src/System.Private.Uri/src/System/IriHelper.cs index bbd1555c9447..4af8f9b916c0 100644 --- a/src/System.Private.Uri/src/System/IriHelper.cs +++ b/src/System.Private.Uri/src/System/IriHelper.cs @@ -289,7 +289,7 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end { if (CheckIriUnicodeRange(ch, component == UriComponents.Query)) { - if (!UriHelper.IsBidiControlCharacter(ch)) + if (!UriHelper.IsBidiControlCharacter(ch) || !UriParser.DontKeepUnicodeBidiFormattingCharacters) { // copy it Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); diff --git a/src/System.Private.Uri/src/System/UriHelper.cs b/src/System.Private.Uri/src/System/UriHelper.cs index 7888795c470f..0c602e26aeba 100644 --- a/src/System.Private.Uri/src/System/UriHelper.cs +++ b/src/System.Private.Uri/src/System/UriHelper.cs @@ -569,7 +569,7 @@ internal static class UriHelper EscapeAsciiChar((char)encodedBytes[l], dest, ref destOffset); } } - else if (!UriHelper.IsBidiControlCharacter(unescapedCharsPtr[j])) + else if (!UriHelper.IsBidiControlCharacter(unescapedCharsPtr[j]) || !UriParser.DontKeepUnicodeBidiFormattingCharacters) { //copy chars Debug.Assert(dest.Length > destOffset, "Destination length exceeded destination offset."); diff --git a/src/System.Private.Uri/src/System/UriSyntax.cs b/src/System.Private.Uri/src/System/UriSyntax.cs index 741a45c240ae..15bb88e8529b 100644 --- a/src/System.Private.Uri/src/System/UriSyntax.cs +++ b/src/System.Private.Uri/src/System/UriSyntax.cs @@ -110,6 +110,16 @@ internal static bool DontEnableStrictRFC3986ReservedCharacterSets } } + internal static bool DontKeepUnicodeBidiFormattingCharacters + { + // In .NET Framework this would test against an AppContextSwitch. Since this is a potentially + // breaking change, we'll leave in the system used to disable it. + get + { + return false; + } + } + static UriParser() { s_table = new LowLevelDictionary(c_InitialTableSize); diff --git a/src/System.Private.Uri/tests/FunctionalTests/IriEncodingDecodingTests.cs b/src/System.Private.Uri/tests/FunctionalTests/IriEncodingDecodingTests.cs index a7a3eba8d8cb..1319ad72ce12 100644 --- a/src/System.Private.Uri/tests/FunctionalTests/IriEncodingDecodingTests.cs +++ b/src/System.Private.Uri/tests/FunctionalTests/IriEncodingDecodingTests.cs @@ -19,7 +19,11 @@ public class IriEncodingDecodingTest { "%2D%5F%2E%7E", @"-_.~" }, // Encoded RFC 3986 Unreserved Marks. { "%2F%3F%3A%40%23%5B%5D", "%2F%3F%3A%40%23%5B%5D" }, // Encoded RFC 3986 Gen Delims. { @";&=+$,!'()*", @";&=+$,!'()*" }, // RFC 3986 Sub Delims. - { "%3B%26%3D%2B%24%2C%21%27%28%29%2A", "%3B%26%3D%2B%24%2C%21%27%28%29%2A" } // Encoded RFC3986 Sub Delims. + { "%3B%26%3D%2B%24%2C%21%27%28%29%2A", "%3B%26%3D%2B%24%2C%21%27%28%29%2A" }, // Encoded RFC3986 Sub Delims. + { "%E2%80%8F%E2%80%8E%E2%80%AA%E2%80%AB%E2%80%AC%E2%80%AD%E2%80%AE", + "%E2%80%8F%E2%80%8E%E2%80%AA%E2%80%AB%E2%80%AC%E2%80%AD%E2%80%AE" }, // Encoded Unicode Bidi Control Characters. + { "\u200E\u200F\u202A\u202B\u202C\u202D\u202E", + "%E2%80%8E%E2%80%8F%E2%80%AA%E2%80%AB%E2%80%AC%E2%80%AD%E2%80%AE" }, // Unencoded Unicode Bidi Control Characters }; [Fact] diff --git a/src/System.Private.Uri/tests/UnitTests/Fakes/FakeUriParser.cs b/src/System.Private.Uri/tests/UnitTests/Fakes/FakeUriParser.cs index 347e9527b16c..6508e1750aca 100644 --- a/src/System.Private.Uri/tests/UnitTests/Fakes/FakeUriParser.cs +++ b/src/System.Private.Uri/tests/UnitTests/Fakes/FakeUriParser.cs @@ -13,5 +13,13 @@ internal static bool DontEnableStrictRFC3986ReservedCharacterSets return false; } } + + internal static bool DontKeepUnicodeBidiFormattingCharacters + { + get + { + return false; + } + } } }