From b7a08fa1451e59d5bb63d2245c293e9c2d71ac87 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Mon, 7 Aug 2023 04:08:50 +0200 Subject: [PATCH 1/3] Remove unsafe code and make scheme parsing faster in Uri --- .../src/System/DomainNameHelper.cs | 6 +- .../System.Private.Uri/src/System/Uri.cs | 392 +++++------------- .../src/System/UriHelper.cs | 11 + 3 files changed, 109 insertions(+), 300 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index 76a9ad6295c9..4e6a79da1fee 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -75,11 +75,7 @@ internal static string ParseCanonicalName(string str, int start, int end, ref bo if (index >= 0) { // We saw uppercase letters. Avoid allocating both the substring and the lower-cased variant. - return string.Create(end - start, (str, start), static (buffer, state) => - { - int newLength = state.str.AsSpan(state.start, buffer.Length).ToLowerInvariant(buffer); - Debug.Assert(newLength == buffer.Length); - }); + return UriHelper.SpanToLowerInvariantString(str.AsSpan(start, end - start)); } if (str.AsSpan(start, end - start) is Localhost or Loopback) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 3db22a031a80..f2f3d6751ef0 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -610,8 +610,10 @@ public Uri(Uri baseUri, Uri relativeUri) break; } - UriParser? syntax = null; - if (CheckSchemeSyntax(relativeStr.AsSpan(0, i), ref syntax) == ParsingError.None) + ParsingError error = ParsingError.None; + UriParser? syntax = CheckSchemeSyntax(relativeStr.AsSpan(0, i), ref error); + + if (error == ParsingError.None) { if (baseUri.Syntax == syntax) { @@ -1909,17 +1911,30 @@ private static unsafe ParsingError ParseScheme(string uriString, ref Flags flags if (length >= c_MaxUriBufferSize) return ParsingError.SizeLimit; - //STEP1: parse scheme, lookup this Uri Syntax or create one using UnknownV1SyntaxFlags uri syntax template - fixed (char* pUriString = uriString) + // Fast path for valid http(s) schemes with no leading whitespace that are expected to be very common. + if (uriString.StartsWith("https:", StringComparison.OrdinalIgnoreCase)) + { + syntax = UriParser.HttpsUri; + flags |= (Flags)6; + } + else if (uriString.StartsWith("http:", StringComparison.OrdinalIgnoreCase)) { + syntax = UriParser.HttpUri; + flags |= (Flags)5; + } + else + { + // STEP1: parse scheme, lookup this Uri Syntax or create one using UnknownV1SyntaxFlags uri syntax template ParsingError err = ParsingError.None; - int idx = ParseSchemeCheckImplicitFile(pUriString, length, ref err, ref flags, ref syntax); + int idx = ParseSchemeCheckImplicitFile(uriString, ref err, ref flags, ref syntax); + Debug.Assert((err is ParsingError.None) == (syntax is not null)); if (err != ParsingError.None) return err; flags |= (Flags)idx; } + return ParsingError.None; } @@ -3592,108 +3607,95 @@ private unsafe void ParseRemaining() InterlockedSetFlags(cF); } - // // verifies the syntax of the scheme part // Checks on implicit File: scheme due to simple Dos/Unc path passed // returns the start of the next component position - // throws UriFormatException if invalid scheme - // - private static unsafe int ParseSchemeCheckImplicitFile(char* uriString, int length, - ref ParsingError err, ref Flags flags, ref UriParser? syntax) + private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingError err, ref Flags flags, ref UriParser? syntax) { + Debug.Assert(err == ParsingError.None); Debug.Assert((flags & Flags.Debug_LeftConstructor) == 0); - int idx = 0; + int i = 0; - //skip whitespace - while (idx < length && UriHelper.IsLWS(uriString[idx])) + // skip whitespace + while (i < uriString.Length && UriHelper.IsLWS(uriString[i])) { - ++idx; + i++; } // Unix: Unix path? // A path starting with 2 / or \ (including mixed) is treated as UNC and will be matched below - if (!OperatingSystem.IsWindows() && idx < length && uriString[idx] == '/' && - (idx + 1 == length || (uriString[idx + 1] != '/' && uriString[idx + 1] != '\\'))) + if (!OperatingSystem.IsWindows() && + (uint)i < (uint)uriString.Length && uriString[i] == '/' && + ((uint)(i + 1) >= (uint)uriString.Length || uriString[i + 1] is not ('/' or '\\'))) { flags |= (Flags.UnixPath | Flags.ImplicitFile | Flags.AuthorityFound); syntax = UriParser.UnixFileUri; - return idx; + return i; } - // sets the recognizer for well known registered schemes - // file, ftp, http, https, uuid, etc + // Find the colon. // Note that we don't support one-letter schemes that will be put into a DOS path bucket - int end = idx; - while (end < length && uriString[end] != ':') - { - ++end; - } - - // NB: On 64-bits we will use less optimized code from CheckSchemeSyntax() - // - if (IntPtr.Size == 4) + int colonIndex = i; + while ((uint)colonIndex < (uint)uriString.Length && uriString[colonIndex] != ':') { - // long = 4chars: The minimal size of a known scheme is 2 + ':' - if (end != length && end >= idx + 2 && - CheckKnownSchemes((long*)(uriString + idx), end - idx, ref syntax)) - { - return end + 1; - } + colonIndex++; } - //NB: A string must have at least 3 characters and at least 1 before ':' - if (idx + 2 >= length || end == idx) + // NB: A string must have at least 3 characters and at least 1 before ':' + if ((uint)(i + 2) >= (uint)uriString.Length || colonIndex == i) { err = ParsingError.BadFormat; return 0; } - //Check for supported special cases like a DOS file path OR a UNC share path - //NB: A string may not have ':' if this is a UNC path + // Check for supported special cases like a DOS file path OR a UNC share path + // NB: A string may not have ':' if this is a UNC path { - char c; - if ((c = uriString[idx + 1]) == ':' || c == '|') + if (uriString[i + 1] is ':' or '|') { - //DOS-like path? - if (char.IsAsciiLetter(uriString[idx])) + // DOS-like path? + if (char.IsAsciiLetter(uriString[i])) { - if ((c = uriString[idx + 2]) == '\\' || c == '/') + if (uriString[i + 2] is '\\' or '/') { flags |= (Flags.DosPath | Flags.ImplicitFile | Flags.AuthorityFound); syntax = UriParser.FileUri; - return idx; + return i; } + err = ParsingError.MustRootedPath; return 0; } - if (c == ':') - err = ParsingError.BadScheme; - else - err = ParsingError.BadFormat; + + err = uriString[i + 1] == ':' ? ParsingError.BadScheme : ParsingError.BadFormat; return 0; } - else if ((c = uriString[idx]) == '/' || c == '\\') + else if (uriString[i] is '/' or '\\') { - //UNC share? - if ((c = uriString[idx + 1]) == '\\' || c == '/') + // UNC share? + if (uriString[i + 1] is '\\' or '/') { flags |= (Flags.UncPath | Flags.ImplicitFile | Flags.AuthorityFound); syntax = UriParser.FileUri; - idx += 2; + i += 2; + // V1.1 compat this will simply eat any slashes prepended to a UNC path - while (idx < length && ((c = uriString[idx]) == '/' || c == '\\')) - ++idx; + while ((uint)i < (uint)uriString.Length && uriString[i] is '/' or '\\') + { + i++; + } - return idx; + return i; } + err = ParsingError.BadFormat; return 0; } } - if (end == length) + if (colonIndex == uriString.Length) { err = ParsingError.BadFormat; return 0; @@ -3701,275 +3703,75 @@ private unsafe void ParseRemaining() // This is a potentially valid scheme, but we have not identified it yet. // Check for illegal characters, canonicalize, and check the length. - err = CheckSchemeSyntax(new ReadOnlySpan(uriString + idx, end - idx), ref syntax!); - if (err != ParsingError.None) + syntax = CheckSchemeSyntax(uriString.AsSpan(i, colonIndex - i), ref err); + if (syntax is null) { return 0; } - return end + 1; + return colonIndex + 1; } - // - // Quickly parses well known schemes. - // nChars does not include the last ':'. Assuming there is one at the end of passed buffer - private static unsafe bool CheckKnownSchemes(long* lptr, int nChars, ref UriParser? syntax) + // This will check whether a scheme string follows the rules + private static UriParser? CheckSchemeSyntax(ReadOnlySpan scheme, ref ParsingError error) { - //NOTE beware of too short input buffers! - - const long _HTTP_Mask0 = 'h' | ('t' << 16) | ((long)'t' << 32) | ((long)'p' << 48); - const char _HTTPS_Mask1 = 's'; - const int _WS_Mask = 'w' | ('s' << 16); - const long _WSS_Mask = 'w' | ('s' << 16) | ((long)'s' << 32) | ((long)':' << 48); - const long _FTP_Mask = 'f' | ('t' << 16) | ((long)'p' << 32) | ((long)':' << 48); - const long _FILE_Mask0 = 'f' | ('i' << 16) | ((long)'l' << 32) | ((long)'e' << 48); - const long _GOPHER_Mask0 = 'g' | ('o' << 16) | ((long)'p' << 32) | ((long)'h' << 48); - const int _GOPHER_Mask1 = 'e' | ('r' << 16); - const long _MAILTO_Mask0 = 'm' | ('a' << 16) | ((long)'i' << 32) | ((long)'l' << 48); - const int _MAILTO_Mask1 = 't' | ('o' << 16); - const long _NEWS_Mask0 = 'n' | ('e' << 16) | ((long)'w' << 32) | ((long)'s' << 48); - const long _NNTP_Mask0 = 'n' | ('n' << 16) | ((long)'t' << 32) | ((long)'p' << 48); - const long _UUID_Mask0 = 'u' | ('u' << 16) | ((long)'i' << 32) | ((long)'d' << 48); - - const long _TELNET_Mask0 = 't' | ('e' << 16) | ((long)'l' << 32) | ((long)'n' << 48); - const int _TELNET_Mask1 = 'e' | ('t' << 16); - - const long _NETXXX_Mask0 = 'n' | ('e' << 16) | ((long)'t' << 32) | ((long)'.' << 48); - const long _NETTCP_Mask1 = 't' | ('c' << 16) | ((long)'p' << 32) | ((long)':' << 48); - const long _NETPIPE_Mask1 = 'p' | ('i' << 16) | ((long)'p' << 32) | ((long)'e' << 48); + Debug.Assert(error == ParsingError.None); - const long _LDAP_Mask0 = 'l' | ('d' << 16) | ((long)'a' << 32) | ((long)'p' << 48); - - - const long _LOWERCASE_Mask = 0x0020002000200020L; - const int _INT_LOWERCASE_Mask = 0x00200020; - - if (nChars == 2) + switch (scheme.Length) { - // This is the only known scheme of length 2 - if ((unchecked((int)*lptr) | _INT_LOWERCASE_Mask) == _WS_Mask) - { - syntax = UriParser.WsUri; - return true; - } - return false; - } - - //Map to a known scheme if possible - //upgrade 4 letters to ASCII lower case, keep a false case to stay false - switch (*lptr | _LOWERCASE_Mask) - { - case _HTTP_Mask0: - if (nChars == 4) - { - syntax = UriParser.HttpUri; - return true; - } - if (nChars == 5 && ((*(char*)(lptr + 1)) | 0x20) == _HTTPS_Mask1) - { - syntax = UriParser.HttpsUri; - return true; - } - break; - case _WSS_Mask: - if (nChars == 3) - { - syntax = UriParser.WssUri; - return true; - } - break; - case _FILE_Mask0: - if (nChars == 4) - { - syntax = UriParser.FileUri; - return true; - } - break; - case _FTP_Mask: - if (nChars == 3) - { - syntax = UriParser.FtpUri; - return true; - } - break; - - case _NEWS_Mask0: - if (nChars == 4) - { - syntax = UriParser.NewsUri; - return true; - } - break; - - case _NNTP_Mask0: - if (nChars == 4) - { - syntax = UriParser.NntpUri; - return true; - } + case 2: + if (scheme.Equals("ws", StringComparison.OrdinalIgnoreCase)) return UriParser.WsUri; break; - case _UUID_Mask0: - if (nChars == 4) - { - syntax = UriParser.UuidUri; - return true; - } + case 3: + if (scheme.Equals("wss", StringComparison.OrdinalIgnoreCase)) return UriParser.WssUri; + if (scheme.Equals("ftp", StringComparison.OrdinalIgnoreCase)) return UriParser.FtpUri; break; - case _GOPHER_Mask0: - if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _GOPHER_Mask1) - { - syntax = UriParser.GopherUri; - return true; - } - break; - case _MAILTO_Mask0: - if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _MAILTO_Mask1) - { - syntax = UriParser.MailToUri; - return true; - } + case 4: + if (scheme.Equals("http", StringComparison.OrdinalIgnoreCase)) return UriParser.HttpUri; + if (scheme.Equals("file", StringComparison.OrdinalIgnoreCase)) return UriParser.FileUri; + if (scheme.Equals("uuid", StringComparison.OrdinalIgnoreCase)) return UriParser.UuidUri; + if (scheme.Equals("nntp", StringComparison.OrdinalIgnoreCase)) return UriParser.NntpUri; + if (scheme.Equals("ldap", StringComparison.OrdinalIgnoreCase)) return UriParser.LdapUri; + if (scheme.Equals("news", StringComparison.OrdinalIgnoreCase)) return UriParser.NewsUri; break; - case _TELNET_Mask0: - if (nChars == 6 && (*(int*)(lptr + 1) | _INT_LOWERCASE_Mask) == _TELNET_Mask1) - { - syntax = UriParser.TelnetUri; - return true; - } + case 5: + if (scheme.Equals("https", StringComparison.OrdinalIgnoreCase)) return UriParser.HttpsUri; break; - case _NETXXX_Mask0: - if (nChars == 8 && (*(lptr + 1) | _LOWERCASE_Mask) == _NETPIPE_Mask1) - { - syntax = UriParser.NetPipeUri; - return true; - } - else if (nChars == 7 && (*(lptr + 1) | _LOWERCASE_Mask) == _NETTCP_Mask1) - { - syntax = UriParser.NetTcpUri; - return true; - } + case 6: + if (scheme.Equals("mailto", StringComparison.OrdinalIgnoreCase)) return UriParser.MailToUri; + if (scheme.Equals("gopher", StringComparison.OrdinalIgnoreCase)) return UriParser.GopherUri; + if (scheme.Equals("telnet", StringComparison.OrdinalIgnoreCase)) return UriParser.TelnetUri; break; - case _LDAP_Mask0: - if (nChars == 4) - { - syntax = UriParser.LdapUri; - return true; - } + case 7: + if (scheme.Equals("net.tcp", StringComparison.OrdinalIgnoreCase)) return UriParser.NetTcpUri; break; - default: break; - } - return false; - } - - // - // This will check whether a scheme string follows the rules - // - private static unsafe ParsingError CheckSchemeSyntax(ReadOnlySpan span, ref UriParser? syntax) - { - static char ToLowerCaseAscii(char c) => char.IsAsciiLetterUpper(c) ? (char)(c | 0x20) : c; - - if (span.Length == 0) - { - return ParsingError.BadScheme; - } - - // The first character must be an alpha. Validate that and store it as lower-case, as - // all of the fast-path checks need that value. - char firstLower = span[0]; - if (char.IsAsciiLetterUpper(firstLower)) - { - firstLower = (char)(firstLower | 0x20); - } - else if (!char.IsAsciiLetterLower(firstLower)) - { - return ParsingError.BadScheme; - } - // Special-case common and known schemes to avoid allocations and dictionary lookups in these cases. - const int wsMask = 'w' << 8 | 's'; - const int ftpMask = 'f' << 16 | 't' << 8 | 'p'; - const int wssMask = 'w' << 16 | 's' << 8 | 's'; - const int fileMask = 'f' << 24 | 'i' << 16 | 'l' << 8 | 'e'; - const int httpMask = 'h' << 24 | 't' << 16 | 't' << 8 | 'p'; - const int mailMask = 'm' << 24 | 'a' << 16 | 'i' << 8 | 'l'; - switch (span.Length) - { - case 2: - if (wsMask == (firstLower << 8 | ToLowerCaseAscii(span[1]))) - { - syntax = UriParser.WsUri; - return ParsingError.None; - } - break; - case 3: - switch (firstLower << 16 | ToLowerCaseAscii(span[1]) << 8 | ToLowerCaseAscii(span[2])) - { - case ftpMask: - syntax = UriParser.FtpUri; - return ParsingError.None; - case wssMask: - syntax = UriParser.WssUri; - return ParsingError.None; - } - break; - case 4: - switch (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3])) - { - case httpMask: - syntax = UriParser.HttpUri; - return ParsingError.None; - case fileMask: - syntax = UriParser.FileUri; - return ParsingError.None; - } - break; - case 5: - if (httpMask == (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3])) && - ToLowerCaseAscii(span[4]) == 's') - { - syntax = UriParser.HttpsUri; - return ParsingError.None; - } - break; - case 6: - if (mailMask == (firstLower << 24 | ToLowerCaseAscii(span[1]) << 16 | ToLowerCaseAscii(span[2]) << 8 | ToLowerCaseAscii(span[3])) && - ToLowerCaseAscii(span[4]) == 't' && ToLowerCaseAscii(span[5]) == 'o') - { - syntax = UriParser.MailToUri; - return ParsingError.None; - } + case 8: + if (scheme.Equals("net.pipe", StringComparison.OrdinalIgnoreCase)) return UriParser.NetPipeUri; break; } - // The scheme is not known. Validate all of the characters in the input. - for (int i = 1; i < span.Length; i++) + // scheme = alpha *(alpha | digit | '+' | '-' | '.') + if (scheme.Length == 0 || + !char.IsAsciiLetter(scheme[0]) || + scheme.ContainsAnyExcept(s_schemeChars)) { - char c = span[i]; - if (!char.IsAsciiLetterOrDigit(c) && c != '+' && c != '-' && c != '.') - { - return ParsingError.BadScheme; - } + error = ParsingError.BadScheme; + return null; } - if (span.Length > c_MaxUriSchemeName) + if (scheme.Length > c_MaxUriSchemeName) { - return ParsingError.SchemeLimit; + error = ParsingError.SchemeLimit; + return null; } // Then look up the syntax in a string-based table. -#pragma warning disable CS8500 // takes address of managed type - ReadOnlySpan tmpSpan = span; // avoid address exposing the span and impacting the other code in the method that uses it - string str = string.Create(tmpSpan.Length, (IntPtr)(&tmpSpan), (buffer, spanPtr) => - { - int charsWritten = (*(ReadOnlySpan*)spanPtr).ToLowerInvariant(buffer); - Debug.Assert(charsWritten == buffer.Length); - }); -#pragma warning restore CS8500 - syntax = UriParser.FindOrFetchAsUnknownV1Syntax(str); - return ParsingError.None; + return UriParser.FindOrFetchAsUnknownV1Syntax(UriHelper.SpanToLowerInvariantString(scheme)); } // diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 3fa5b49893fd..e4edf02e86ab 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -10,6 +10,17 @@ namespace System { internal static class UriHelper { + public static unsafe string SpanToLowerInvariantString(ReadOnlySpan span) + { +#pragma warning disable CS8500 // takes address of managed type + return string.Create(span.Length, (IntPtr)(&span), static (buffer, spanPtr) => + { + int charsWritten = (*(ReadOnlySpan*)spanPtr).ToLowerInvariant(buffer); + Debug.Assert(charsWritten == buffer.Length); + }); +#pragma warning restore CS8500 + } + // http://host/Path/Path/File?Query is the base of // - http://host/Path/Path/File/ ... (those "File" words may be different in semantic but anyway) // - http://host/Path/Path/#Fragment From fd95daea018a82282623cb1edd639c6ba407fe79 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Mon, 7 Aug 2023 05:16:47 +0200 Subject: [PATCH 2/3] Reduce the number of range checks --- .../System.Private.Uri/src/System/Uri.cs | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index f2f3d6751ef0..d9cdea81daab 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -3618,7 +3618,7 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr int i = 0; // skip whitespace - while (i < uriString.Length && UriHelper.IsLWS(uriString[i])) + while ((uint)i < (uint)uriString.Length && UriHelper.IsLWS(uriString[i])) { i++; } @@ -3644,7 +3644,11 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr } // NB: A string must have at least 3 characters and at least 1 before ':' - if ((uint)(i + 2) >= (uint)uriString.Length || colonIndex == i) + if ((uint)(i + 2) >= (uint)uriString.Length || + colonIndex == i || + // Redundant checks to eliminate range checks below + (uint)i >= (uint)uriString.Length || + (uint)(i + 1) >= (uint)uriString.Length) { err = ParsingError.BadFormat; return 0; @@ -3652,47 +3656,45 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr // Check for supported special cases like a DOS file path OR a UNC share path // NB: A string may not have ':' if this is a UNC path + if (uriString[i + 1] is ':' or '|') { - if (uriString[i + 1] is ':' or '|') + // DOS-like path? + if (char.IsAsciiLetter(uriString[i])) { - // DOS-like path? - if (char.IsAsciiLetter(uriString[i])) + if (uriString[i + 2] is '\\' or '/') { - if (uriString[i + 2] is '\\' or '/') - { - flags |= (Flags.DosPath | Flags.ImplicitFile | Flags.AuthorityFound); - syntax = UriParser.FileUri; - return i; - } - - err = ParsingError.MustRootedPath; - return 0; + flags |= (Flags.DosPath | Flags.ImplicitFile | Flags.AuthorityFound); + syntax = UriParser.FileUri; + return i; } - err = uriString[i + 1] == ':' ? ParsingError.BadScheme : ParsingError.BadFormat; + err = ParsingError.MustRootedPath; return 0; } - else if (uriString[i] is '/' or '\\') - { - // UNC share? - if (uriString[i + 1] is '\\' or '/') - { - flags |= (Flags.UncPath | Flags.ImplicitFile | Flags.AuthorityFound); - syntax = UriParser.FileUri; - i += 2; - // V1.1 compat this will simply eat any slashes prepended to a UNC path - while ((uint)i < (uint)uriString.Length && uriString[i] is '/' or '\\') - { - i++; - } + err = uriString[i + 1] == ':' ? ParsingError.BadScheme : ParsingError.BadFormat; + return 0; + } + else if (uriString[i] is '/' or '\\') + { + // UNC share? + if (uriString[i + 1] is '\\' or '/') + { + flags |= (Flags.UncPath | Flags.ImplicitFile | Flags.AuthorityFound); + syntax = UriParser.FileUri; + i += 2; - return i; + // V1.1 compat this will simply eat any slashes prepended to a UNC path + while ((uint)i < (uint)uriString.Length && uriString[i] is '/' or '\\') + { + i++; } - err = ParsingError.BadFormat; - return 0; + return i; } + + err = ParsingError.BadFormat; + return 0; } if (colonIndex == uriString.Length) From d040afa1b46436831586af63e7f53025f54ab9db Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 9 Aug 2023 20:36:51 +0200 Subject: [PATCH 3/3] Switch loop to IndexOf --- .../System.Private.Uri/src/System/Uri.cs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index d9cdea81daab..770e035d36a0 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -3636,16 +3636,11 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr // Find the colon. // Note that we don't support one-letter schemes that will be put into a DOS path bucket - - int colonIndex = i; - while ((uint)colonIndex < (uint)uriString.Length && uriString[colonIndex] != ':') - { - colonIndex++; - } + int colonOffset = uriString.AsSpan(i).IndexOf(':'); // NB: A string must have at least 3 characters and at least 1 before ':' if ((uint)(i + 2) >= (uint)uriString.Length || - colonIndex == i || + colonOffset == 0 || // Redundant checks to eliminate range checks below (uint)i >= (uint)uriString.Length || (uint)(i + 1) >= (uint)uriString.Length) @@ -3697,7 +3692,7 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr return 0; } - if (colonIndex == uriString.Length) + if (colonOffset < 0) { err = ParsingError.BadFormat; return 0; @@ -3705,12 +3700,12 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr // This is a potentially valid scheme, but we have not identified it yet. // Check for illegal characters, canonicalize, and check the length. - syntax = CheckSchemeSyntax(uriString.AsSpan(i, colonIndex - i), ref err); + syntax = CheckSchemeSyntax(uriString.AsSpan(i, colonOffset), ref err); if (syntax is null) { return 0; } - return colonIndex + 1; + return i + colonOffset + 1; } // This will check whether a scheme string follows the rules