diff --git a/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs b/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs index 7d043d273fec..e590ac7dda31 100644 --- a/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs +++ b/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs @@ -383,6 +383,7 @@ public async Task Caching_SendFileNoContentLength_NotCached() } } + [QuarantinedTest("new issue")] [ConditionalFact] public async Task Caching_SendFileWithFullContentLength_Cached() { diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index d1e57077d2fc..eef91f142abe 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.InteropServices; using System.Text; using Microsoft.AspNetCore.Internal; using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure; @@ -50,193 +51,112 @@ public static string DecodePath(Span path, bool pathEncoded, string rawTar } // In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4 - public static unsafe int RemoveDotSegments(Span input) + public static int RemoveDotSegments(Span src) { - fixed (byte* start = input) - { - var end = start + input.Length; - return RemoveDotSegments(start, end); - } - } - - public static unsafe int RemoveDotSegments(byte* start, byte* end) - { - if (!ContainsDotSegments(start, end)) - { - return (int)(end - start); - } + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan dotSlash = "./"u8; + ReadOnlySpan slashDot = "/."u8; - var src = start; - var dst = start; + var writtenLength = 0; + var readPointer = 0; - while (src < end) + while (src.Length > readPointer) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var currentSrc = src[readPointer..]; + var nextDotSegmentIndex = currentSrc.IndexOf(slashDot); + if (nextDotSegmentIndex < 0 && readPointer == 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 1; - *src = ByteSlash; - continue; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); - - if (ch2 == ByteDot && ch3 == ByteDot) - { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 2; - *src = ByteSlash; - - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } - - continue; - } - else if (ch2 == ByteDot && ch3 == ByteSlash) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; - } - - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) - { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 3; - - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } - - continue; - } - else if (ch2 == ByteDot && ch3 == ByteSlash) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; - } - - break; + return src.Length; } - - // E. move the first path segment in the input buffer to the end of - // the output buffer, including the initial "/" character (if - // any) and any subsequent characters up to, but not including, - // the next "/" character or the end of the input buffer. - do + if (nextDotSegmentIndex < 0) { - *dst++ = ch1; - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); - } - - if (dst == start) - { - *dst++ = ByteSlash; - } - - return (int)(dst - start); - } - - public static unsafe bool ContainsDotSegments(byte* start, byte* end) - { - var src = start; - while (src < end) - { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + // Copy the remianing src to dst, and return. + currentSrc.CopyTo(src[writtenLength..]); + writtenLength += src.Length - readPointer; + return writtenLength; + } + else if (nextDotSegmentIndex > 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); + // Copy until the next segment excluding the trailer. + currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]); + writtenLength += nextDotSegmentIndex; + readPointer += nextDotSegmentIndex; + } - if (ch2 == ByteDot) - { - return true; - } + var remainingLength = src.Length - readPointer; - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); + // Case of /../ or /./ or non-dot segments. + if (remainingLength > 3) + { + var nextIndex = readPointer + 2; + + if (src[nextIndex] == ByteSlash) + { + // Case: /./ + readPointer = nextIndex; + } + else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash)) + { + // Case: /../ + // Remove the last segment and replace the path with / + var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // Move write pointer to the end of the previous segment without / or to start position + writtenLength = int.Max(0, lastIndex); + + // Move the read pointer to the next segments beginning including / + readPointer += 3; + } + else + { + // Not a dot segment e.g. /.a, copy the matched /. and the next character then bump the read pointer + src.Slice(readPointer, 3).CopyTo(src[writtenLength..]); + writtenLength += 3; + readPointer = nextIndex + 1; + } + } - if ((ch2 == ByteDot && ch3 == ByteDot) || - (ch2 == ByteDot && ch3 == ByteSlash)) + // Ending with /.. or /./ or non-dot segments. + else if (remainingLength == 3) + { + var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) + { + // Case: /./ Replace the /./ segment with a closing / + src[writtenLength++] = ByteSlash; + return writtenLength; + } + else if (src[nextIndex] == ByteDot) + { + // Case: /.. Remove the last segment and replace the path with / + var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // If this was the beginning of the string, then return / + if (lastSlashIndex < 0) { - return true; + Debug.Assert(src[0] == '/'); + return 1; } - - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) || - (ch2 == ByteDot && ch3 == ByteSlash)) + else { - return true; + writtenLength = lastSlashIndex + 1; } - - break; + return writtenLength; + } + else + { + // Not a dot segment e.g. /.a, copy the remaining part. + src[readPointer..].CopyTo(src[writtenLength..]); + return writtenLength + 3; + } } - - do + // Ending with /. + else if (remainingLength == 2) { - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); + src[writtenLength++] = ByteSlash; + return writtenLength; + } } - - return false; + return writtenLength; } } diff --git a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs index e0d253d4cf42..2c58a68b8db7 100644 --- a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs +++ b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System; @@ -54,6 +54,29 @@ public class PathNormalizerTests [InlineData("/", "/")] [InlineData("/no/segments", "/no/segments")] [InlineData("/no/segments/", "/no/segments/")] + [InlineData("/././", "/")] + [InlineData("/./.", "/")] + [InlineData("/../..", "/")] + [InlineData("/../../", "/")] + [InlineData("/../.", "/")] + [InlineData("/./..", "/")] + [InlineData("/.././", "/")] + [InlineData("/./../", "/")] + [InlineData("/..", "/")] + [InlineData("/.", "/")] + [InlineData("/a/abc/../abc/../b", "/a/b")] + [InlineData("/a/abc/.a", "/a/abc/.a")] + [InlineData("/a/abc/..a", "/a/abc/..a")] + [InlineData("/a/.b/c", "/a/.b/c")] + [InlineData("/a/.b/../c", "/a/c")] + [InlineData("/a/../.b/./c", "/.b/c")] + [InlineData("/a/.b/./c", "/a/.b/c")] + [InlineData("/a/./.b/./c", "/a/.b/c")] + [InlineData("/a/..b/c", "/a/..b/c")] + [InlineData("/a/..b/../c", "/a/c")] + [InlineData("/a/../..b/./c", "/..b/c")] + [InlineData("/a/..b/./c", "/a/..b/c")] + [InlineData("/a/./..b/./c", "/a/..b/c")] public void RemovesDotSegments(string input, string expected) { var data = Encoding.ASCII.GetBytes(input); diff --git a/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs b/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs index 16e901bea5f4..05e9a2b0fcc0 100644 --- a/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs +++ b/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Text; @@ -13,45 +13,53 @@ public class DotSegmentRemovalBenchmark private const string _noDotSegments = "/long/request/target/for/benchmarking/what/else/can/we/put/here"; private const string _singleDotSegments = "/long/./request/./target/./for/./benchmarking/./what/./else/./can/./we/./put/./here"; private const string _doubleDotSegments = "/long/../request/../target/../for/../benchmarking/../what/../else/../can/../we/../put/../here"; + private const string _oneSingleDotSegments = "/long/request/target/for/./benchmarking/what/else/can/we/put/here"; + private const string _oneDoubleDotSegments = "/long/request/target/for/../benchmarking/what/else/can/we/put/here"; private readonly byte[] _noDotSegmentsAscii = Encoding.ASCII.GetBytes(_noDotSegments); private readonly byte[] _singleDotSegmentsAscii = Encoding.ASCII.GetBytes(_singleDotSegments); private readonly byte[] _doubleDotSegmentsAscii = Encoding.ASCII.GetBytes(_doubleDotSegments); + private readonly byte[] _oneDingleDotSegmentsAscii = Encoding.ASCII.GetBytes(_oneSingleDotSegments); + private readonly byte[] _oneDoubleDotSegmentsAscii = Encoding.ASCII.GetBytes(_oneDoubleDotSegments); private readonly byte[] _noDotSegmentsBytes = new byte[_noDotSegments.Length]; private readonly byte[] _singleDotSegmentsBytes = new byte[_singleDotSegments.Length]; private readonly byte[] _doubleDotSegmentsBytes = new byte[_doubleDotSegments.Length]; + private readonly byte[] _oneSingleDotSegmentsBytes = new byte[_singleDotSegments.Length]; + private readonly byte[] _oneDoubleDotSegmentsBytes = new byte[_doubleDotSegments.Length]; [Benchmark(Baseline = true)] - public unsafe int NoDotSegments() + public int NoDotSegments() { _noDotSegmentsAscii.CopyTo(_noDotSegmentsBytes, 0); - - fixed (byte* start = _noDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _noDotSegments.Length); - } + return PathNormalizer.RemoveDotSegments(_noDotSegmentsBytes); } [Benchmark] - public unsafe int SingleDotSegments() + public int SingleDotSegments() { _singleDotSegmentsAscii.CopyTo(_singleDotSegmentsBytes, 0); - - fixed (byte* start = _singleDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _singleDotSegments.Length); - } + return PathNormalizer.RemoveDotSegments(_singleDotSegmentsBytes); } [Benchmark] - public unsafe int DoubleDotSegments() + public int DoubleDotSegments() { _doubleDotSegmentsAscii.CopyTo(_doubleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_doubleDotSegmentsBytes); + } - fixed (byte* start = _doubleDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _doubleDotSegments.Length); - } + [Benchmark] + public int OneSingleDotSegments() + { + _oneDingleDotSegmentsAscii.CopyTo(_oneSingleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_oneSingleDotSegmentsBytes); + } + + [Benchmark] + public int OneDoubleDotSegments() + { + _oneDoubleDotSegmentsAscii.CopyTo(_oneDoubleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_oneDoubleDotSegmentsBytes); } } diff --git a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs index d29d26624578..bc26d3bd4608 100644 --- a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs +++ b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.InteropServices; namespace Microsoft.AspNetCore.HttpSys.Internal; @@ -11,195 +12,112 @@ internal static class PathNormalizer private const byte ByteDot = (byte)'.'; // In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4 - public static unsafe int RemoveDotSegments(Span input) + public static int RemoveDotSegments(Span src) { - fixed (byte* start = input) - { - var end = start + input.Length; - return RemoveDotSegments(start, end); - } - } - - public static unsafe int RemoveDotSegments(byte* start, byte* end) - { - if (!ContainsDotSegments(start, end)) - { - return (int)(end - start); - } + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan dotSlash = "./"u8; + ReadOnlySpan slashDot = "/."u8; - var src = start; - var dst = start; + var writtenLength = 0; + var readPointer = 0; - while (src < end) + while (src.Length > readPointer) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var currentSrc = src[readPointer..]; + var nextDotSegmentIndex = currentSrc.IndexOf(slashDot); + if (nextDotSegmentIndex < 0 && readPointer == 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 1; - *src = ByteSlash; - continue; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); - - if (ch2 == ByteDot && ch3 == ByteDot) - { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 2; - *src = ByteSlash; - - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } - - continue; - } - else if (ch2 == ByteDot && ch3 == ByteSlash) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; - } - - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) - { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 3; - - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } - - continue; - } - else if (ch2 == ByteDot && ch3 == ByteSlash) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; - } - - break; + return src.Length; } - - // E. move the first path segment in the input buffer to the end of - // the output buffer, including the initial "/" character (if - // any) and any subsequent characters up to, but not including, - // the next "/" character or the end of the input buffer. - do + if (nextDotSegmentIndex < 0) { - *dst++ = ch1; - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); - } - - if (dst == start) - { - *dst++ = ByteSlash; - } - - return (int)(dst - start); - } - - public static unsafe bool ContainsDotSegments(byte* start, byte* end) - { - var src = start; - var dst = start; - - while (src < end) - { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + // Copy the remianing src to dst, and return. + currentSrc.CopyTo(src[writtenLength..]); + writtenLength += src.Length - readPointer; + return writtenLength; + } + else if (nextDotSegmentIndex > 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); + // Copy until the next segment excluding the trailer. + currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]); + writtenLength += nextDotSegmentIndex; + readPointer += nextDotSegmentIndex; + } - if (ch2 == ByteDot) - { - return true; - } + var remainingLength = src.Length - readPointer; - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); + // Case of /../ or /./ or non-dot segments. + if (remainingLength > 3) + { + var nextIndex = readPointer + 2; + + if (src[nextIndex] == ByteSlash) + { + // Case: /./ + readPointer = nextIndex; + } + else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash)) + { + // Case: /../ + // Remove the last segment and replace the path with / + var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // Move write pointer to the end of the previous segment without / or to start position + writtenLength = int.Max(0, lastIndex); + + // Move the read pointer to the next segments beginning including / + readPointer += 3; + } + else + { + // Not a dot segment e.g. /.a, copy the matched /. and the next character then bump the read pointer + src.Slice(readPointer, 3).CopyTo(src[writtenLength..]); + writtenLength += 3; + readPointer = nextIndex + 1; + } + } - if ((ch2 == ByteDot && ch3 == ByteDot) || - (ch2 == ByteDot && ch3 == ByteSlash)) + // Ending with /.. or /./ or non-dot segments. + else if (remainingLength == 3) + { + var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) + { + // Case: /./ Replace the /./ segment with a closing / + src[writtenLength++] = ByteSlash; + return writtenLength; + } + else if (src[nextIndex] == ByteDot) + { + // Case: /.. Remove the last segment and replace the path with / + var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // If this was the beginning of the string, then return / + if (lastSlashIndex < 0) { - return true; + Debug.Assert(src[0] == '/'); + return 1; } - - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) || - (ch2 == ByteDot && ch3 == ByteSlash)) + else { - return true; + writtenLength = lastSlashIndex + 1; } - - break; + return writtenLength; + } + else + { + // Not a dot segment e.g. /.a, copy the remaining part. + src[readPointer..].CopyTo(src[writtenLength..]); + return writtenLength + 3; + } } - - do + // Ending with /. + else if (remainingLength == 2) { - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); + src[writtenLength++] = ByteSlash; + return writtenLength; + } } - - return false; + return writtenLength; } }