From b87fa6c594cb6df7fb63227e6634a15f45ef3236 Mon Sep 17 00:00:00 2001 From: ladeak Date: Sun, 7 Jul 2024 21:23:53 +0200 Subject: [PATCH 01/13] Tests --- .../Core/src/Internal/Http/PathNormalizer.cs | 231 +++++++----------- .../Kestrel/Core/test/PathNormalizerTests.cs | 12 +- 2 files changed, 96 insertions(+), 147 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index d1e57077d2fc..297516b9cd07 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -50,193 +50,132 @@ public static string DecodePath(Span path, bool pathEncoded, string rawTar } // In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4 - public static unsafe int RemoveDotSegments(Span input) + public static int RemoveDotSegments(Span src) { - fixed (byte* start = input) + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan slashDot = "/."u8; + ReadOnlySpan dotSlash = "./"u8; + if (!ContainsDotSegments(src)) { - var end = start + input.Length; - return RemoveDotSegments(start, end); + return src.Length; } - } - public static unsafe int RemoveDotSegments(byte* start, byte* end) - { - if (!ContainsDotSegments(start, end)) - { - return (int)(end - start); - } + var writtenLength = 0; + var dst = src; - var src = start; - var dst = start; - - while (src < end) + while (src.Length > 0) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var nextSlashDotIndex = src.IndexOf(slashDot); + if (nextSlashDotIndex < 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 1; - *src = ByteSlash; - continue; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); + // Copy the remianing src to dst, and return. + src.CopyTo(dst[writtenLength..]); + writtenLength += src.Length; + return writtenLength; + } + else + { + src.Slice(0, nextSlashDotIndex).CopyTo(dst[writtenLength..]); + writtenLength += nextSlashDotIndex; + src = src[(nextSlashDotIndex + 2)..]; + } - if (ch2 == ByteDot && ch3 == ByteDot) + switch (src.Length) + { + case 0: // Ending with /. + dst[writtenLength++] = ByteSlash; + return writtenLength; + case 1: // Ending with either /.. or /./ + if (src[0] == ByteDot) { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 2; - *src = ByteSlash; - - if (dst > start) + // Backtrack + if (writtenLength > 0) { - do + var lastIndex = dst.Slice(0, writtenLength - 1).LastIndexOf(ByteSlash); + if (lastIndex < 0) { - dst--; - } while (dst > start && *dst != ByteSlash); + writtenLength = 0; + } + else + { + writtenLength = lastIndex + 1; + } + return writtenLength; + } + else + { + dst[0] = ByteSlash; + return 1; } - - continue; } - else if (ch2 == ByteDot && ch3 == ByteSlash) + else if (src[0] == ByteSlash) { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; + dst[writtenLength++] = ByteSlash; + return writtenLength; } - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) + default: // Case of /../ or /./ + if (dotSlash.SequenceEqual(src.Slice(0, 2))) { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 3; - - if (dst > start) + // Backtrack + if (writtenLength > 0) { - do + var lastIndex = dst.Slice(0, writtenLength - 1).LastIndexOf(ByteSlash); + if (lastIndex < 0) + { + writtenLength = 0; + } + else { - dst--; - } while (dst > start && *dst != ByteSlash); + writtenLength = lastIndex; + } } - - continue; + src = src.Slice(1); } - else if (ch2 == ByteDot && ch3 == ByteSlash) + else if (src[0] == ByteSlash && writtenLength > 0) { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; + //dst[writtenLength++] = ByteSlash; } - break; } - - // E. move the first path segment in the input buffer to the end of - // the output buffer, including the initial "/" character (if - // any) and any subsequent characters up to, but not including, - // the next "/" character or the end of the input buffer. - do - { - *dst++ = ch1; - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); } - - if (dst == start) - { - *dst++ = ByteSlash; - } - - return (int)(dst - start); + return writtenLength; } - public static unsafe bool ContainsDotSegments(byte* start, byte* end) + public static bool ContainsDotSegments(Span src) { - var src = start; - while (src < end) + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan slashDot = "/."u8; + ReadOnlySpan dotSlash = "./"u8; + while (src.Length > 0) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var nextSlashDotIndex = src.IndexOf(slashDot); + if (nextSlashDotIndex < 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - return true; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); - - if ((ch2 == ByteDot && ch3 == ByteDot) || - (ch2 == ByteDot && ch3 == ByteSlash)) + return false; + } + else + { + src = src[(nextSlashDotIndex + 2)..]; + } + switch (src.Length) + { + case 0: // Case of /. + return true; + case 1: // Case of /.. or /./ + if (src[0] == ByteDot || src[0] == ByteSlash) { return true; } - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) || - (ch2 == ByteDot && ch3 == ByteSlash)) + default: // Case of /../ or /./ + if (dotSlash.SequenceEqual(src.Slice(0, 2)) || src[0] == ByteSlash) { return true; } - break; } - - do - { - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); } - return false; } } diff --git a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs index e0d253d4cf42..cce1ce687908 100644 --- a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs +++ b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System; @@ -54,6 +54,16 @@ public class PathNormalizerTests [InlineData("/", "/")] [InlineData("/no/segments", "/no/segments")] [InlineData("/no/segments/", "/no/segments/")] + [InlineData("/././", "/")] + [InlineData("/./.", "/")] + [InlineData("/../..", "/")] + [InlineData("/../../", "/")] + [InlineData("/../.", "/")] + [InlineData("/./..", "/")] + [InlineData("/.././", "/")] + [InlineData("/./../", "/")] + [InlineData("/..", "/")] + [InlineData("/.", "/")] public void RemovesDotSegments(string input, string expected) { var data = Encoding.ASCII.GetBytes(input); From 3575c4557bb04bd51a574215277bd1219e890616 Mon Sep 17 00:00:00 2001 From: ladeak Date: Sun, 7 Jul 2024 22:03:03 +0200 Subject: [PATCH 02/13] FirstIndexOfDotSegment --- .../Core/src/Internal/Http/PathNormalizer.cs | 82 +++++++++---------- .../Kestrel/Core/test/PathNormalizerTests.cs | 1 + 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 297516b9cd07..4a180af23166 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -53,20 +53,15 @@ public static string DecodePath(Span path, bool pathEncoded, string rawTar public static int RemoveDotSegments(Span src) { Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); - ReadOnlySpan slashDot = "/."u8; ReadOnlySpan dotSlash = "./"u8; - if (!ContainsDotSegments(src)) - { - return src.Length; - } var writtenLength = 0; var dst = src; while (src.Length > 0) { - var nextSlashDotIndex = src.IndexOf(slashDot); - if (nextSlashDotIndex < 0) + var nextDotSegmentIndex = FirstIndexOfDotSegment(src); + if (nextDotSegmentIndex < 0) { // Copy the remianing src to dst, and return. src.CopyTo(dst[writtenLength..]); @@ -75,9 +70,9 @@ public static int RemoveDotSegments(Span src) } else { - src.Slice(0, nextSlashDotIndex).CopyTo(dst[writtenLength..]); - writtenLength += nextSlashDotIndex; - src = src[(nextSlashDotIndex + 2)..]; + src.Slice(0, nextDotSegmentIndex).CopyTo(dst[writtenLength..]); + writtenLength += nextDotSegmentIndex; + src = src[(nextDotSegmentIndex + 2)..]; } switch (src.Length) @@ -88,25 +83,18 @@ public static int RemoveDotSegments(Span src) case 1: // Ending with either /.. or /./ if (src[0] == ByteDot) { - // Backtrack - if (writtenLength > 0) + // Remove the last segment and replace the path with '/' + var lastSlashIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); + if (lastSlashIndex < 0) { - var lastIndex = dst.Slice(0, writtenLength - 1).LastIndexOf(ByteSlash); - if (lastIndex < 0) - { - writtenLength = 0; - } - else - { - writtenLength = lastIndex + 1; - } - return writtenLength; + dst[0] = ByteSlash; + return 1; } else { - dst[0] = ByteSlash; - return 1; + writtenLength = lastSlashIndex + 1; } + return writtenLength; } else if (src[0] == ByteSlash) { @@ -117,25 +105,24 @@ public static int RemoveDotSegments(Span src) default: // Case of /../ or /./ if (dotSlash.SequenceEqual(src.Slice(0, 2))) { - // Backtrack - if (writtenLength > 0) + // Remove the last segment and replace the path with '/' + var lastIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); + if (lastIndex < 0) + { + dst[0] = ByteSlash; + writtenLength = 1; + } + else { - var lastIndex = dst.Slice(0, writtenLength - 1).LastIndexOf(ByteSlash); - if (lastIndex < 0) - { - writtenLength = 0; - } - else - { - writtenLength = lastIndex; - } + writtenLength = lastIndex; } src = src.Slice(1); } - else if (src[0] == ByteSlash && writtenLength > 0) - { - //dst[writtenLength++] = ByteSlash; - } + //else if (src[0] == ByteSlash) + //{ + // dst[writtenLength++] = ByteSlash; + // src = src.Slice(1); + //} break; } } @@ -143,39 +130,46 @@ public static int RemoveDotSegments(Span src) } public static bool ContainsDotSegments(Span src) + { + return FirstIndexOfDotSegment(src) > -1; + } + + private static int FirstIndexOfDotSegment(Span src) { Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); ReadOnlySpan slashDot = "/."u8; ReadOnlySpan dotSlash = "./"u8; + int totalLength = 0; while (src.Length > 0) { var nextSlashDotIndex = src.IndexOf(slashDot); if (nextSlashDotIndex < 0) { - return false; + return -1; } else { src = src[(nextSlashDotIndex + 2)..]; + totalLength += nextSlashDotIndex + 2; } switch (src.Length) { case 0: // Case of /. - return true; + return totalLength - 2; case 1: // Case of /.. or /./ if (src[0] == ByteDot || src[0] == ByteSlash) { - return true; + return totalLength - 2; } break; default: // Case of /../ or /./ if (dotSlash.SequenceEqual(src.Slice(0, 2)) || src[0] == ByteSlash) { - return true; + return totalLength - 2; } break; } } - return false; + return -1; } } diff --git a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs index cce1ce687908..da8c85687bfe 100644 --- a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs +++ b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs @@ -70,5 +70,6 @@ public void RemovesDotSegments(string input, string expected) var length = PathNormalizer.RemoveDotSegments(new Span(data)); Assert.True(length >= 1); Assert.Equal(expected, Encoding.ASCII.GetString(data, 0, length)); + Assert.Equal(input != expected, PathNormalizer.ContainsDotSegments(Encoding.ASCII.GetBytes(input))); } } From d830a59992992c9433bb38e9579dcea462f16f7e Mon Sep 17 00:00:00 2001 From: ladeak Date: Thu, 11 Jul 2024 08:42:48 +0200 Subject: [PATCH 03/13] Next iteration --- .../Core/src/Internal/Http/PathNormalizer.cs | 35 ++++++++------- .../DotSegmentRemovalBenchmark.cs | 44 +++++++++++-------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 4a180af23166..77a6d2c4acd1 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -70,6 +70,9 @@ public static int RemoveDotSegments(Span src) } else { + // Copy until the next segment excluding the trailer. Move the read pointer + // beyond the initial /. section, because FirstIndexOfDotSegment return the + // index of a complete dot segment. src.Slice(0, nextDotSegmentIndex).CopyTo(dst[writtenLength..]); writtenLength += nextDotSegmentIndex; src = src[(nextDotSegmentIndex + 2)..]; @@ -80,11 +83,14 @@ public static int RemoveDotSegments(Span src) case 0: // Ending with /. dst[writtenLength++] = ByteSlash; return writtenLength; - case 1: // Ending with either /.. or /./ + + case 1: // Ending with /.. or /./ if (src[0] == ByteDot) { - // Remove the last segment and replace the path with '/' + // Remove the last segment and replace the path with / var lastSlashIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); + + // If this was the beginning of the string, then return / if (lastSlashIndex < 0) { dst[0] = ByteSlash; @@ -98,6 +104,7 @@ public static int RemoveDotSegments(Span src) } else if (src[0] == ByteSlash) { + // Replace the /./ segment with a closing / dst[writtenLength++] = ByteSlash; return writtenLength; } @@ -105,24 +112,18 @@ public static int RemoveDotSegments(Span src) default: // Case of /../ or /./ if (dotSlash.SequenceEqual(src.Slice(0, 2))) { - // Remove the last segment and replace the path with '/' + // Remove the last segment and replace the path with / var lastIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); - if (lastIndex < 0) - { - dst[0] = ByteSlash; - writtenLength = 1; - } - else - { - writtenLength = lastIndex; - } + + // Move write pointer to the end of the previous segment without / or to start position + writtenLength = Math.Max(0, lastIndex); + + // Move the read pointer to the next segments beginning including / src = src.Slice(1); } - //else if (src[0] == ByteSlash) - //{ - // dst[writtenLength++] = ByteSlash; - // src = src.Slice(1); - //} + // Else if it was /./ segment, then move the read pointer to end of the segment. + // As the pointer is already there, do nothing for this case. Note, that this switches + // always handles a complete dot segment. break; } } diff --git a/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs b/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs index 16e901bea5f4..05e9a2b0fcc0 100644 --- a/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs +++ b/src/Servers/Kestrel/perf/Microbenchmarks/DotSegmentRemovalBenchmark.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Text; @@ -13,45 +13,53 @@ public class DotSegmentRemovalBenchmark private const string _noDotSegments = "/long/request/target/for/benchmarking/what/else/can/we/put/here"; private const string _singleDotSegments = "/long/./request/./target/./for/./benchmarking/./what/./else/./can/./we/./put/./here"; private const string _doubleDotSegments = "/long/../request/../target/../for/../benchmarking/../what/../else/../can/../we/../put/../here"; + private const string _oneSingleDotSegments = "/long/request/target/for/./benchmarking/what/else/can/we/put/here"; + private const string _oneDoubleDotSegments = "/long/request/target/for/../benchmarking/what/else/can/we/put/here"; private readonly byte[] _noDotSegmentsAscii = Encoding.ASCII.GetBytes(_noDotSegments); private readonly byte[] _singleDotSegmentsAscii = Encoding.ASCII.GetBytes(_singleDotSegments); private readonly byte[] _doubleDotSegmentsAscii = Encoding.ASCII.GetBytes(_doubleDotSegments); + private readonly byte[] _oneDingleDotSegmentsAscii = Encoding.ASCII.GetBytes(_oneSingleDotSegments); + private readonly byte[] _oneDoubleDotSegmentsAscii = Encoding.ASCII.GetBytes(_oneDoubleDotSegments); private readonly byte[] _noDotSegmentsBytes = new byte[_noDotSegments.Length]; private readonly byte[] _singleDotSegmentsBytes = new byte[_singleDotSegments.Length]; private readonly byte[] _doubleDotSegmentsBytes = new byte[_doubleDotSegments.Length]; + private readonly byte[] _oneSingleDotSegmentsBytes = new byte[_singleDotSegments.Length]; + private readonly byte[] _oneDoubleDotSegmentsBytes = new byte[_doubleDotSegments.Length]; [Benchmark(Baseline = true)] - public unsafe int NoDotSegments() + public int NoDotSegments() { _noDotSegmentsAscii.CopyTo(_noDotSegmentsBytes, 0); - - fixed (byte* start = _noDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _noDotSegments.Length); - } + return PathNormalizer.RemoveDotSegments(_noDotSegmentsBytes); } [Benchmark] - public unsafe int SingleDotSegments() + public int SingleDotSegments() { _singleDotSegmentsAscii.CopyTo(_singleDotSegmentsBytes, 0); - - fixed (byte* start = _singleDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _singleDotSegments.Length); - } + return PathNormalizer.RemoveDotSegments(_singleDotSegmentsBytes); } [Benchmark] - public unsafe int DoubleDotSegments() + public int DoubleDotSegments() { _doubleDotSegmentsAscii.CopyTo(_doubleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_doubleDotSegmentsBytes); + } - fixed (byte* start = _doubleDotSegmentsBytes) - { - return PathNormalizer.RemoveDotSegments(start, start + _doubleDotSegments.Length); - } + [Benchmark] + public int OneSingleDotSegments() + { + _oneDingleDotSegmentsAscii.CopyTo(_oneSingleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_oneSingleDotSegmentsBytes); + } + + [Benchmark] + public int OneDoubleDotSegments() + { + _oneDoubleDotSegmentsAscii.CopyTo(_oneDoubleDotSegmentsBytes, 0); + return PathNormalizer.RemoveDotSegments(_oneDoubleDotSegmentsBytes); } } From 1de225a6502022e69b38645e1723ae3bf4efb839 Mon Sep 17 00:00:00 2001 From: ladeak Date: Fri, 12 Jul 2024 21:32:33 +0200 Subject: [PATCH 04/13] Next iteration --- .../Core/src/Internal/Http/PathNormalizer.cs | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 77a6d2c4acd1..dfb79b4363d4 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -54,13 +54,14 @@ public static int RemoveDotSegments(Span src) { Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); ReadOnlySpan dotSlash = "./"u8; + ReadOnlySpan slashDot = "/."u8; var writtenLength = 0; var dst = src; while (src.Length > 0) { - var nextDotSegmentIndex = FirstIndexOfDotSegment(src); + var nextDotSegmentIndex = src.IndexOf(slashDot); if (nextDotSegmentIndex < 0) { // Copy the remianing src to dst, and return. @@ -68,24 +69,28 @@ public static int RemoveDotSegments(Span src) writtenLength += src.Length; return writtenLength; } - else + else if (nextDotSegmentIndex > 0) { // Copy until the next segment excluding the trailer. Move the read pointer // beyond the initial /. section, because FirstIndexOfDotSegment return the // index of a complete dot segment. src.Slice(0, nextDotSegmentIndex).CopyTo(dst[writtenLength..]); writtenLength += nextDotSegmentIndex; - src = src[(nextDotSegmentIndex + 2)..]; + src = src[(nextDotSegmentIndex)..]; } switch (src.Length) { - case 0: // Ending with /. + case 0: + case 1: + Debug.Fail("This should be always larger than 1"); + break; + case 2: // Ending with /. dst[writtenLength++] = ByteSlash; return writtenLength; - case 1: // Ending with /.. or /./ - if (src[0] == ByteDot) + case 3: // Ending with /.. or /./ + if (src[2] == ByteDot) { // Remove the last segment and replace the path with / var lastSlashIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); @@ -102,15 +107,20 @@ public static int RemoveDotSegments(Span src) } return writtenLength; } - else if (src[0] == ByteSlash) + else if (src[2] == ByteSlash) { // Replace the /./ segment with a closing / dst[writtenLength++] = ByteSlash; return writtenLength; } + else + { + dst[writtenLength++] = ByteSlash; + src = src.Slice(1); + } break; default: // Case of /../ or /./ - if (dotSlash.SequenceEqual(src.Slice(0, 2))) + if (dotSlash.SequenceEqual(src.Slice(2, 2))) { // Remove the last segment and replace the path with / var lastIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); @@ -119,11 +129,19 @@ public static int RemoveDotSegments(Span src) writtenLength = Math.Max(0, lastIndex); // Move the read pointer to the next segments beginning including / - src = src.Slice(1); + src = src.Slice(3); + } + else if (src[2] == ByteSlash) + { + src = src.Slice(2); } - // Else if it was /./ segment, then move the read pointer to end of the segment. - // As the pointer is already there, do nothing for this case. Note, that this switches - // always handles a complete dot segment. + else + { + dst[writtenLength++] = ByteSlash; + dst[writtenLength++] = ByteDot; + src = src.Slice(2); + } + break; } } From 205eae8f413da9195e6da777a36006e3dd3ce63d Mon Sep 17 00:00:00 2001 From: ladeak Date: Fri, 12 Jul 2024 21:43:55 +0200 Subject: [PATCH 05/13] Back to contains from FirstIndexOf --- .../Core/src/Internal/Http/PathNormalizer.cs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index dfb79b4363d4..0f5f04cf2607 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -71,7 +71,7 @@ public static int RemoveDotSegments(Span src) } else if (nextDotSegmentIndex > 0) { - // Copy until the next segment excluding the trailer. Move the read pointer + // Copy until the next segment excluding the trailer. // beyond the initial /. section, because FirstIndexOfDotSegment return the // index of a complete dot segment. src.Slice(0, nextDotSegmentIndex).CopyTo(dst[writtenLength..]); @@ -149,46 +149,39 @@ public static int RemoveDotSegments(Span src) } public static bool ContainsDotSegments(Span src) - { - return FirstIndexOfDotSegment(src) > -1; - } - - private static int FirstIndexOfDotSegment(Span src) { Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); ReadOnlySpan slashDot = "/."u8; ReadOnlySpan dotSlash = "./"u8; - int totalLength = 0; while (src.Length > 0) { var nextSlashDotIndex = src.IndexOf(slashDot); if (nextSlashDotIndex < 0) { - return -1; + return false; } else { src = src[(nextSlashDotIndex + 2)..]; - totalLength += nextSlashDotIndex + 2; } switch (src.Length) { case 0: // Case of /. - return totalLength - 2; + return true; case 1: // Case of /.. or /./ if (src[0] == ByteDot || src[0] == ByteSlash) { - return totalLength - 2; + return true; } break; default: // Case of /../ or /./ if (dotSlash.SequenceEqual(src.Slice(0, 2)) || src[0] == ByteSlash) { - return totalLength - 2; + return true; } break; } } - return -1; + return false; } } From dfd631e4c57412d4df8c8710a2aaac2f51f5fc21 Mon Sep 17 00:00:00 2001 From: ladeak Date: Fri, 12 Jul 2024 22:14:51 +0200 Subject: [PATCH 06/13] Next iteration ++ --- .../Core/src/Internal/Http/PathNormalizer.cs | 121 +++++++++--------- .../Kestrel/Core/test/PathNormalizerTests.cs | 2 + 2 files changed, 62 insertions(+), 61 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 0f5f04cf2607..958b879ad750 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -72,77 +72,76 @@ public static int RemoveDotSegments(Span src) else if (nextDotSegmentIndex > 0) { // Copy until the next segment excluding the trailer. - // beyond the initial /. section, because FirstIndexOfDotSegment return the - // index of a complete dot segment. - src.Slice(0, nextDotSegmentIndex).CopyTo(dst[writtenLength..]); + src[..nextDotSegmentIndex].CopyTo(dst[writtenLength..]); writtenLength += nextDotSegmentIndex; - src = src[(nextDotSegmentIndex)..]; + src = src[nextDotSegmentIndex..]; } - switch (src.Length) + // Case of /../ or /./ + if (src.Length > 3) { - case 0: - case 1: - Debug.Fail("This should be always larger than 1"); - break; - case 2: // Ending with /. + if (src[2] == ByteSlash) + { + src = src[2..]; + } + else if (dotSlash.SequenceEqual(src.Slice(2, 2))) + { + // Remove the last segment and replace the path with / + var lastIndex = dst[..writtenLength].LastIndexOf(ByteSlash); + + // Move write pointer to the end of the previous segment without / or to start position + writtenLength = int.Max(0, lastIndex); + + // Move the read pointer to the next segments beginning including / + src = src[3..]; + } + else + { + // No dot segment, copy the matched /. and bump the read pointer + slashDot.CopyTo(dst[writtenLength..]); + writtenLength += 2; + src = src[2..]; + } + } + // Ending with /.. or /./ + else if (src.Length == 3) + { + if (src[2] == ByteSlash) + { + // Replace the /./ segment with a closing / dst[writtenLength++] = ByteSlash; return writtenLength; - - case 3: // Ending with /.. or /./ - if (src[2] == ByteDot) - { - // Remove the last segment and replace the path with / - var lastSlashIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); - - // If this was the beginning of the string, then return / - if (lastSlashIndex < 0) - { - dst[0] = ByteSlash; - return 1; - } - else - { - writtenLength = lastSlashIndex + 1; - } - return writtenLength; - } - else if (src[2] == ByteSlash) + } + else if (src[2] == ByteDot) + { + // Remove the last segment and replace the path with / + var lastSlashIndex = dst[..writtenLength].LastIndexOf(ByteSlash); + + // If this was the beginning of the string, then return / + if (lastSlashIndex < 0) { - // Replace the /./ segment with a closing / - dst[writtenLength++] = ByteSlash; - return writtenLength; + dst[0] = ByteSlash; + return 1; } else { - dst[writtenLength++] = ByteSlash; - src = src.Slice(1); - } - break; - default: // Case of /../ or /./ - if (dotSlash.SequenceEqual(src.Slice(2, 2))) - { - // Remove the last segment and replace the path with / - var lastIndex = dst.Slice(0, writtenLength).LastIndexOf(ByteSlash); - - // Move write pointer to the end of the previous segment without / or to start position - writtenLength = Math.Max(0, lastIndex); - - // Move the read pointer to the next segments beginning including / - src = src.Slice(3); + writtenLength = lastSlashIndex + 1; } - else if (src[2] == ByteSlash) - { - src = src.Slice(2); - } - else - { - dst[writtenLength++] = ByteSlash; - dst[writtenLength++] = ByteDot; - src = src.Slice(2); - } - - break; + return writtenLength; + } + else + { + // No dot segment, copy the /. and bump the read pointer. + slashDot.CopyTo(dst[writtenLength..]); + writtenLength += 2; + src = src[2..]; + } + } + // Ending with /. + else if (src.Length == 2) + { + dst[writtenLength++] = ByteSlash; + return writtenLength; } } return writtenLength; @@ -175,7 +174,7 @@ public static bool ContainsDotSegments(Span src) } break; default: // Case of /../ or /./ - if (dotSlash.SequenceEqual(src.Slice(0, 2)) || src[0] == ByteSlash) + if (dotSlash.SequenceEqual(src[..2]) || src[0] == ByteSlash) { return true; } diff --git a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs index da8c85687bfe..075d5e9fa07e 100644 --- a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs +++ b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs @@ -64,6 +64,8 @@ public class PathNormalizerTests [InlineData("/./../", "/")] [InlineData("/..", "/")] [InlineData("/.", "/")] + [InlineData("/a/abc/../abc/../b", "/a/b")] + [InlineData("/a/abc/.a", "/a/abc/.a")] public void RemovesDotSegments(string input, string expected) { var data = Encoding.ASCII.GetBytes(input); From d3eba1ccd1585a80ed175295f1dd4e9dda3c89c7 Mon Sep 17 00:00:00 2001 From: ladeak Date: Sat, 13 Jul 2024 22:59:47 +0200 Subject: [PATCH 07/13] READPOINTER --- .../Core/src/Internal/Http/PathNormalizer.cs | 63 +++++++++++-------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 958b879ad750..afb641bf0205 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; using Microsoft.AspNetCore.Internal; using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure; @@ -57,70 +59,77 @@ public static int RemoveDotSegments(Span src) ReadOnlySpan slashDot = "/."u8; var writtenLength = 0; - var dst = src; + var readPointer = 0; - while (src.Length > 0) + while (src.Length > readPointer) { - var nextDotSegmentIndex = src.IndexOf(slashDot); + var nextDotSegmentIndex = src[readPointer..].IndexOf(slashDot); + if (nextDotSegmentIndex < 0 && readPointer == 0) + { + return src.Length; + } if (nextDotSegmentIndex < 0) { // Copy the remianing src to dst, and return. - src.CopyTo(dst[writtenLength..]); - writtenLength += src.Length; + src[readPointer..].CopyTo(src[writtenLength..]); + writtenLength += src.Length - readPointer; return writtenLength; } else if (nextDotSegmentIndex > 0) { // Copy until the next segment excluding the trailer. - src[..nextDotSegmentIndex].CopyTo(dst[writtenLength..]); + Unsafe.CopyBlock(ref src[writtenLength], ref src[readPointer], (uint)nextDotSegmentIndex); writtenLength += nextDotSegmentIndex; - src = src[nextDotSegmentIndex..]; + readPointer += nextDotSegmentIndex; } // Case of /../ or /./ - if (src.Length > 3) + var remainingLength = src.Length - readPointer; + if (remainingLength > 3) { - if (src[2] == ByteSlash) + var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) { - src = src[2..]; + readPointer = nextIndex; } - else if (dotSlash.SequenceEqual(src.Slice(2, 2))) + else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash)) { // Remove the last segment and replace the path with / - var lastIndex = dst[..writtenLength].LastIndexOf(ByteSlash); + var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); // Move write pointer to the end of the previous segment without / or to start position writtenLength = int.Max(0, lastIndex); // Move the read pointer to the next segments beginning including / - src = src[3..]; + readPointer += 3; } else { - // No dot segment, copy the matched /. and bump the read pointer - slashDot.CopyTo(dst[writtenLength..]); + // Not a dot segment, copy the matched /. and bump the read pointer + slashDot.CopyTo(src[writtenLength..]); writtenLength += 2; - src = src[2..]; + readPointer = nextIndex; } } // Ending with /.. or /./ - else if (src.Length == 3) + else if (remainingLength == 3) { - if (src[2] == ByteSlash) + var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) { // Replace the /./ segment with a closing / - dst[writtenLength++] = ByteSlash; + src[writtenLength++] = ByteSlash; return writtenLength; } - else if (src[2] == ByteDot) + else if (src[nextIndex] == ByteDot) { // Remove the last segment and replace the path with / - var lastSlashIndex = dst[..writtenLength].LastIndexOf(ByteSlash); + var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); // If this was the beginning of the string, then return / if (lastSlashIndex < 0) { - dst[0] = ByteSlash; + src[0] = ByteSlash; return 1; } else @@ -131,16 +140,16 @@ public static int RemoveDotSegments(Span src) } else { - // No dot segment, copy the /. and bump the read pointer. - slashDot.CopyTo(dst[writtenLength..]); + // Not a dot segment, copy the /. and bump the read pointer. + slashDot.CopyTo(src[writtenLength..]); writtenLength += 2; - src = src[2..]; + readPointer = nextIndex; } } // Ending with /. - else if (src.Length == 2) + else if (remainingLength == 2) { - dst[writtenLength++] = ByteSlash; + src[writtenLength++] = ByteSlash; return writtenLength; } } From 2b58d1d08bda74a179b41be62cdf3b10bb2bc65a Mon Sep 17 00:00:00 2001 From: ladeak Date: Thu, 18 Jul 2024 18:59:43 +0200 Subject: [PATCH 08/13] Review feedback --- .../Core/src/Internal/Http/PathNormalizer.cs | 63 +++++-------------- .../Kestrel/Core/test/PathNormalizerTests.cs | 12 +++- 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index afb641bf0205..3700a40a099d 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -63,7 +63,8 @@ public static int RemoveDotSegments(Span src) while (src.Length > readPointer) { - var nextDotSegmentIndex = src[readPointer..].IndexOf(slashDot); + var currentSrc = src[readPointer..]; + var nextDotSegmentIndex = currentSrc.IndexOf(slashDot); if (nextDotSegmentIndex < 0 && readPointer == 0) { return src.Length; @@ -71,29 +72,33 @@ public static int RemoveDotSegments(Span src) if (nextDotSegmentIndex < 0) { // Copy the remianing src to dst, and return. - src[readPointer..].CopyTo(src[writtenLength..]); + currentSrc.CopyTo(src[writtenLength..]); writtenLength += src.Length - readPointer; return writtenLength; } else if (nextDotSegmentIndex > 0) { // Copy until the next segment excluding the trailer. - Unsafe.CopyBlock(ref src[writtenLength], ref src[readPointer], (uint)nextDotSegmentIndex); + currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]); writtenLength += nextDotSegmentIndex; readPointer += nextDotSegmentIndex; } - // Case of /../ or /./ var remainingLength = src.Length - readPointer; + + // Case of /../ or /./ or non-dot segments. if (remainingLength > 3) { var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) { + // Case: /./ readPointer = nextIndex; } else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash)) { + // Case: /../ // Remove the last segment and replace the path with / var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); @@ -105,31 +110,32 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment, copy the matched /. and bump the read pointer + // Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer slashDot.CopyTo(src[writtenLength..]); writtenLength += 2; readPointer = nextIndex; } } - // Ending with /.. or /./ + + // Ending with /.. or /./ or non-dot segments. else if (remainingLength == 3) { var nextIndex = readPointer + 2; if (src[nextIndex] == ByteSlash) { - // Replace the /./ segment with a closing / + // Case: /./ Replace the /./ segment with a closing / src[writtenLength++] = ByteSlash; return writtenLength; } else if (src[nextIndex] == ByteDot) { - // Remove the last segment and replace the path with / + // Case: /.. Remove the last segment and replace the path with / var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); // If this was the beginning of the string, then return / if (lastSlashIndex < 0) { - src[0] = ByteSlash; + Debug.Assert(src[0] == '/'); return 1; } else @@ -140,7 +146,7 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment, copy the /. and bump the read pointer. + // Not a dot segment e.g. /.a, copy the /. and bump the read pointer. slashDot.CopyTo(src[writtenLength..]); writtenLength += 2; readPointer = nextIndex; @@ -155,41 +161,4 @@ public static int RemoveDotSegments(Span src) } return writtenLength; } - - public static bool ContainsDotSegments(Span src) - { - Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); - ReadOnlySpan slashDot = "/."u8; - ReadOnlySpan dotSlash = "./"u8; - while (src.Length > 0) - { - var nextSlashDotIndex = src.IndexOf(slashDot); - if (nextSlashDotIndex < 0) - { - return false; - } - else - { - src = src[(nextSlashDotIndex + 2)..]; - } - switch (src.Length) - { - case 0: // Case of /. - return true; - case 1: // Case of /.. or /./ - if (src[0] == ByteDot || src[0] == ByteSlash) - { - return true; - } - break; - default: // Case of /../ or /./ - if (dotSlash.SequenceEqual(src[..2]) || src[0] == ByteSlash) - { - return true; - } - break; - } - } - return false; - } } diff --git a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs index 075d5e9fa07e..2c58a68b8db7 100644 --- a/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs +++ b/src/Servers/Kestrel/Core/test/PathNormalizerTests.cs @@ -66,12 +66,22 @@ public class PathNormalizerTests [InlineData("/.", "/")] [InlineData("/a/abc/../abc/../b", "/a/b")] [InlineData("/a/abc/.a", "/a/abc/.a")] + [InlineData("/a/abc/..a", "/a/abc/..a")] + [InlineData("/a/.b/c", "/a/.b/c")] + [InlineData("/a/.b/../c", "/a/c")] + [InlineData("/a/../.b/./c", "/.b/c")] + [InlineData("/a/.b/./c", "/a/.b/c")] + [InlineData("/a/./.b/./c", "/a/.b/c")] + [InlineData("/a/..b/c", "/a/..b/c")] + [InlineData("/a/..b/../c", "/a/c")] + [InlineData("/a/../..b/./c", "/..b/c")] + [InlineData("/a/..b/./c", "/a/..b/c")] + [InlineData("/a/./..b/./c", "/a/..b/c")] public void RemovesDotSegments(string input, string expected) { var data = Encoding.ASCII.GetBytes(input); var length = PathNormalizer.RemoveDotSegments(new Span(data)); Assert.True(length >= 1); Assert.Equal(expected, Encoding.ASCII.GetString(data, 0, length)); - Assert.Equal(input != expected, PathNormalizer.ContainsDotSegments(Encoding.ASCII.GetBytes(input))); } } From ec418f09488cc56ff59d3805feb28e326db17007 Mon Sep 17 00:00:00 2001 From: ladeak Date: Thu, 18 Jul 2024 19:02:43 +0200 Subject: [PATCH 09/13] Remove using statement --- src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 3700a40a099d..89be5efa0a85 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; using Microsoft.AspNetCore.Internal; From 8590b1b67078457c0e33bf4a5f02f231fa6e7bc6 Mon Sep 17 00:00:00 2001 From: ladeak Date: Mon, 29 Jul 2024 08:34:57 +0200 Subject: [PATCH 10/13] PathNormalizer in Microsoft.AspNetCore.HttpSys.Internal --- .../RequestProcessing/PathNormalizer.cs | 274 ++++++++---------- 1 file changed, 115 insertions(+), 159 deletions(-) diff --git a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs index d29d26624578..dbd47a03d668 100644 --- a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs +++ b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.InteropServices; namespace Microsoft.AspNetCore.HttpSys.Internal; @@ -11,195 +12,150 @@ internal static class PathNormalizer private const byte ByteDot = (byte)'.'; // In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4 - public static unsafe int RemoveDotSegments(Span input) + public static int RemoveDotSegments(Span src) { - fixed (byte* start = input) - { - var end = start + input.Length; - return RemoveDotSegments(start, end); - } - } - - public static unsafe int RemoveDotSegments(byte* start, byte* end) - { - if (!ContainsDotSegments(start, end)) - { - return (int)(end - start); - } + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan dotSlash = "./"u8; + ReadOnlySpan slashDot = "/."u8; - var src = start; - var dst = start; + var writtenLength = 0; + var readPointer = 0; - while (src < end) + while (src.Length > readPointer) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var currentSrc = src[readPointer..]; + var nextDotSegmentIndex = currentSrc.IndexOf(slashDot); + if (nextDotSegmentIndex < 0 && readPointer == 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 1; - *src = ByteSlash; - continue; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); + return src.Length; + } + if (nextDotSegmentIndex < 0) + { + // Copy the remianing src to dst, and return. + currentSrc.CopyTo(src[writtenLength..]); + writtenLength += src.Length - readPointer; + return writtenLength; + } + else if (nextDotSegmentIndex > 0) + { + // Copy until the next segment excluding the trailer. + currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]); + writtenLength += nextDotSegmentIndex; + readPointer += nextDotSegmentIndex; + } - if (ch2 == ByteDot && ch3 == ByteDot) - { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 2; - *src = ByteSlash; + var remainingLength = src.Length - readPointer; - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } + // Case of /../ or /./ or non-dot segments. + if (remainingLength > 3) + { + var nextIndex = readPointer + 2; + + if (src[nextIndex] == ByteSlash) + { + // Case: /./ + readPointer = nextIndex; + } + else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash)) + { + // Case: /../ + // Remove the last segment and replace the path with / + var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // Move write pointer to the end of the previous segment without / or to start position + writtenLength = int.Max(0, lastIndex); + + // Move the read pointer to the next segments beginning including / + readPointer += 3; + } + else + { + // Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer + slashDot.CopyTo(src[writtenLength..]); + writtenLength += 2; + readPointer = nextIndex; + } + } - continue; - } - else if (ch2 == ByteDot && ch3 == ByteSlash) + // Ending with /.. or /./ or non-dot segments. + else if (remainingLength == 3) + { + var nextIndex = readPointer + 2; + if (src[nextIndex] == ByteSlash) + { + // Case: /./ Replace the /./ segment with a closing / + src[writtenLength++] = ByteSlash; + return writtenLength; + } + else if (src[nextIndex] == ByteDot) + { + // Case: /.. Remove the last segment and replace the path with / + var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash); + + // If this was the beginning of the string, then return / + if (lastSlashIndex < 0) { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; + Debug.Assert(src[0] == '/'); + return 1; } - - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) + else { - // C. if the input buffer begins with a prefix of "/../" or "/..", - // where ".." is a complete path segment, then replace that - // prefix with "/" in the input buffer and remove the last - // segment and its preceding "/" (if any) from the output - // buffer; otherwise, - src += 3; - - if (dst > start) - { - do - { - dst--; - } while (dst > start && *dst != ByteSlash); - } - - continue; + writtenLength = lastSlashIndex + 1; } - else if (ch2 == ByteDot && ch3 == ByteSlash) - { - // B. if the input buffer begins with a prefix of "/./" or "/.", - // where "." is a complete path segment, then replace that - // prefix with "/" in the input buffer; otherwise, - src += 2; - continue; - } - - break; + return writtenLength; + } + else + { + // Not a dot segment e.g. /.a, copy the /. and bump the read pointer. + slashDot.CopyTo(src[writtenLength..]); + writtenLength += 2; + readPointer = nextIndex; + } } - - // E. move the first path segment in the input buffer to the end of - // the output buffer, including the initial "/" character (if - // any) and any subsequent characters up to, but not including, - // the next "/" character or the end of the input buffer. - do + // Ending with /. + else if (remainingLength == 2) { - *dst++ = ch1; - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); - } - - if (dst == start) - { - *dst++ = ByteSlash; + src[writtenLength++] = ByteSlash; + return writtenLength; + } } - - return (int)(dst - start); + return writtenLength; } - public static unsafe bool ContainsDotSegments(byte* start, byte* end) + public static bool ContainsDotSegments(Span src) { - var src = start; - var dst = start; - - while (src < end) + Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); + ReadOnlySpan slashDot = "/."u8; + ReadOnlySpan dotSlash = "./"u8; + while (src.Length > 0) { - var ch1 = *src; - Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); - - byte ch2, ch3, ch4; - - switch (end - src) + var nextSlashDotIndex = src.IndexOf(slashDot); + if (nextSlashDotIndex < 0) { - case 1: - break; - case 2: - ch2 = *(src + 1); - - if (ch2 == ByteDot) - { - return true; - } - - break; - case 3: - ch2 = *(src + 1); - ch3 = *(src + 2); - - if ((ch2 == ByteDot && ch3 == ByteDot) || - (ch2 == ByteDot && ch3 == ByteSlash)) + return false; + } + else + { + src = src[(nextSlashDotIndex + 2)..]; + } + switch (src.Length) + { + case 0: // Case of /. + return true; + case 1: // Case of /.. or /./ + if (src[0] == ByteDot || src[0] == ByteSlash) { return true; } - break; - default: - ch2 = *(src + 1); - ch3 = *(src + 2); - ch4 = *(src + 3); - - if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) || - (ch2 == ByteDot && ch3 == ByteSlash)) + default: // Case of /../ or /./ + if (dotSlash.SequenceEqual(src[..2]) || src[0] == ByteSlash) { return true; } - break; } - - do - { - ch1 = *++src; - } while (src < end && ch1 != ByteSlash); } - return false; } } From c95374b6e694514236ffcfbb80b524fbc04c5393 Mon Sep 17 00:00:00 2001 From: ladeak Date: Tue, 30 Jul 2024 07:57:54 +0200 Subject: [PATCH 11/13] Remove ContainsDotSegments in the shared PathNormalizer --- .../RequestProcessing/PathNormalizer.cs | 37 ------------------- 1 file changed, 37 deletions(-) diff --git a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs index dbd47a03d668..a6d5018257c9 100644 --- a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs +++ b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs @@ -121,41 +121,4 @@ public static int RemoveDotSegments(Span src) } return writtenLength; } - - public static bool ContainsDotSegments(Span src) - { - Debug.Assert(src[0] == '/', "Path segment must always start with a '/'"); - ReadOnlySpan slashDot = "/."u8; - ReadOnlySpan dotSlash = "./"u8; - while (src.Length > 0) - { - var nextSlashDotIndex = src.IndexOf(slashDot); - if (nextSlashDotIndex < 0) - { - return false; - } - else - { - src = src[(nextSlashDotIndex + 2)..]; - } - switch (src.Length) - { - case 0: // Case of /. - return true; - case 1: // Case of /.. or /./ - if (src[0] == ByteDot || src[0] == ByteSlash) - { - return true; - } - break; - default: // Case of /../ or /./ - if (dotSlash.SequenceEqual(src[..2]) || src[0] == ByteSlash) - { - return true; - } - break; - } - } - return false; - } } From 239f511218bdee6390b221e0079a1dffb72c5dc6 Mon Sep 17 00:00:00 2001 From: ladeak Date: Fri, 2 Aug 2024 22:53:23 +0200 Subject: [PATCH 12/13] Review feedback copy 3 chars when not matched a dot segment --- .../Core/src/Internal/Http/PathNormalizer.cs | 15 +++++++-------- .../HttpSys/RequestProcessing/PathNormalizer.cs | 15 +++++++-------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs index 89be5efa0a85..eef91f142abe 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs @@ -109,10 +109,10 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer - slashDot.CopyTo(src[writtenLength..]); - writtenLength += 2; - readPointer = nextIndex; + // Not a dot segment e.g. /.a, copy the matched /. and the next character then bump the read pointer + src.Slice(readPointer, 3).CopyTo(src[writtenLength..]); + writtenLength += 3; + readPointer = nextIndex + 1; } } @@ -145,10 +145,9 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment e.g. /.a, copy the /. and bump the read pointer. - slashDot.CopyTo(src[writtenLength..]); - writtenLength += 2; - readPointer = nextIndex; + // Not a dot segment e.g. /.a, copy the remaining part. + src[readPointer..].CopyTo(src[writtenLength..]); + return writtenLength + 3; } } // Ending with /. diff --git a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs index a6d5018257c9..bc26d3bd4608 100644 --- a/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs +++ b/src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs @@ -70,10 +70,10 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer - slashDot.CopyTo(src[writtenLength..]); - writtenLength += 2; - readPointer = nextIndex; + // Not a dot segment e.g. /.a, copy the matched /. and the next character then bump the read pointer + src.Slice(readPointer, 3).CopyTo(src[writtenLength..]); + writtenLength += 3; + readPointer = nextIndex + 1; } } @@ -106,10 +106,9 @@ public static int RemoveDotSegments(Span src) } else { - // Not a dot segment e.g. /.a, copy the /. and bump the read pointer. - slashDot.CopyTo(src[writtenLength..]); - writtenLength += 2; - readPointer = nextIndex; + // Not a dot segment e.g. /.a, copy the remaining part. + src[readPointer..].CopyTo(src[writtenLength..]); + return writtenLength + 3; } } // Ending with /. From 9d1960d9704234997031b29998e5d42a15a52f2c Mon Sep 17 00:00:00 2001 From: ladeak Date: Sat, 3 Aug 2024 10:38:47 +0200 Subject: [PATCH 13/13] Quarantined Caching_SendFileWithFullContentLength_Cached due to cache size limit --- src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs b/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs index 7d043d273fec..e590ac7dda31 100644 --- a/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs +++ b/src/Servers/HttpSys/test/FunctionalTests/ResponseCachingTests.cs @@ -383,6 +383,7 @@ public async Task Caching_SendFileNoContentLength_NotCached() } } + [QuarantinedTest("new issue")] [ConditionalFact] public async Task Caching_SendFileWithFullContentLength_Cached() {