diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardDecoder.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardDecoder.cs
index c5a89178bcd6be..cbcba6392ad822 100644
--- a/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardDecoder.cs
+++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardDecoder.cs
@@ -323,6 +323,22 @@ public void Reset()
_finished = false;
}
+ ///
+ /// Clears the end-of-frame state so the decoder can continue decoding the next frame
+ /// in a stream that contains multiple concatenated Zstandard frames (valid per RFC 8878 §3).
+ ///
+ ///
+ /// returns 0 at the end of each frame, not at the
+ /// end of the stream, and the native context is automatically ready to begin the next frame on the
+ /// following call. Only the managed end-of-frame latch is cleared here; the native context is left
+ /// intact so window-size and dictionary settings carry over to the next frame. Must only be called
+ /// after reported .
+ ///
+ internal void PrepareForNextFrame()
+ {
+ _finished = false;
+ }
+
/// References a prefix for the next decompression operation.
/// The prefix will be used only for the next decompression frame and will be removed when is called. The referenced data must remain valid and unmodified for the duration of the decompression operation.
/// The decoder has been disposed.
diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardStream.Decompress.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardStream.Decompress.cs
index 1e11905d9b69f4..238d24821c4227 100644
--- a/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardStream.Decompress.cs
+++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/Zstandard/ZstandardStream.Decompress.cs
@@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
using System.Buffers;
+using System.Buffers.Binary;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Threading;
@@ -14,6 +15,15 @@ public sealed partial class ZstandardStream
private ZstandardDecoder? _decoder;
private bool _nonEmptyInput;
+ // Set when the decoder reports the end of a frame (OperationStatus.Done). A zstd stream may
+ // contain multiple frames concatenated back-to-back (RFC 8878 §3), so reaching the end of a
+ // frame is not necessarily the end of the stream. While at a frame boundary, a subsequent
+ // end-of-input from the underlying stream is a clean end rather than truncated data.
+ private bool _atFrameBoundary;
+
+ // Length of a Zstandard frame magic number, in bytes.
+ private const int ZstdFrameMagicLength = sizeof(uint);
+
/// Initializes a new instance of the class by using the specified stream and decoder instance.
/// The stream from which data to decompress is read.
/// The decoder instance to use for decompression. The stream will not dispose this decoder; instead, it will reset it when the stream is disposed.
@@ -36,44 +46,123 @@ private bool TryDecompress(Span destination, out int bytesWritten, out Ope
{
Debug.Assert(_decoder != null);
- // Decompress any data we may have in our buffer.
- lastResult = _decoder.Decompress(_buffer.ActiveSpan, destination, out int bytesConsumed, out bytesWritten);
- _buffer.Discard(bytesConsumed);
+ bytesWritten = 0;
- if (lastResult == OperationStatus.InvalidData)
+ while (true)
{
- throw new InvalidDataException(SR.ZstandardStream_Decompress_InvalidData);
- }
+ // Decompress any data we may have in our buffer into the remaining destination.
+ OperationStatus status = _decoder.Decompress(_buffer.ActiveSpan, destination, out int bytesConsumed, out int written);
+ _buffer.Discard(bytesConsumed);
+ bytesWritten += written;
+ destination = destination.Slice(written);
+ lastResult = status;
+
+ if (status == OperationStatus.InvalidData)
+ {
+ throw new InvalidDataException(SR.ZstandardStream_Decompress_InvalidData);
+ }
- // If we successfully decompressed any bytes, or if we've reached the end of the decompression, we're done.
- if (bytesWritten != 0 || lastResult == OperationStatus.Done)
- {
- return true;
- }
+ if (status == OperationStatus.Done)
+ {
+ // The decoder finished a frame. A zstd stream may be a sequence of frames
+ // concatenated back-to-back (RFC 8878 §3) — produced by many encoders/CDNs that
+ // flush a frame per buffer — so the end of a frame is not necessarily the end of
+ // the stream. We're now at a frame boundary; end-of-input here is a clean end.
+ _atFrameBoundary = true;
+
+ // If the next frame is already buffered, keep decoding it on the same native
+ // context (no reset needed: ZSTD_decompressStream automatically begins the next
+ // frame on the following call) into whatever destination space remains.
+ if (_buffer.ActiveLength >= ZstdFrameMagicLength && StartsWithZstdFrame(_buffer.ActiveSpan))
+ {
+ _decoder.PrepareForNextFrame();
+ _atFrameBoundary = false;
+
+ if (destination.IsEmpty)
+ {
+ // No room left to decode the next frame in this call. Hand back what we
+ // have; the stream is not finished, so this must not be reported as Done
+ // (which would trigger end-of-stream handling such as rewinding a seekable
+ // base stream).
+ lastResult = OperationStatus.DestinationTooSmall;
+ return true;
+ }
+
+ continue;
+ }
- if (destination.IsEmpty)
- {
- // The caller provided a zero-byte buffer. This is typically done in order to avoid allocating/renting
- // a buffer until data is known to be available. We don't have perfect knowledge here, as _decoder.Decompress
- // will return DestinationTooSmall whether or not more data is required. As such, we assume that if there's
- // any data in our input buffer, it would have been decompressible into at least one byte of output, and
- // otherwise we need to do a read on the underlying stream. This isn't perfect, because having input data
- // doesn't necessarily mean it'll decompress into at least one byte of output, but it's a reasonable approximation
- // for the 99% case. If it's wrong, it just means that a caller using zero-byte reads as a way to delay
- // getting a buffer to use for a subsequent call may end up getting one earlier than otherwise preferred.
- Debug.Assert(lastResult == OperationStatus.DestinationTooSmall);
- if (_buffer.ActiveLength != 0)
+ // Enough leftover input to rule out another frame: this is trailing data after the
+ // final frame. The stream is complete; leave the trailing bytes untouched (a seekable
+ // base stream is rewound to the end of the compressed data by the caller), mirroring
+ // how DeflateStream handles data after the last gzip member.
+ if (_buffer.ActiveLength >= ZstdFrameMagicLength)
+ {
+ // lastResult is already Done; the stream is complete.
+ return true;
+ }
+
+ // Fewer than ZstdFrameMagicLength bytes remain: not enough to tell whether another
+ // frame follows (its magic number may be split across reads) or this was the last
+ // frame. Hand back any output now and resolve on the next call / underlying read.
+ // Because we're at a frame boundary, end-of-input is treated as a clean end rather
+ // than truncation (see _atFrameBoundary checks in Read/ReadAsync).
+ lastResult = OperationStatus.NeedMoreData;
+ return bytesWritten != 0;
+ }
+
+ // If we successfully decompressed any bytes, we're done for this call.
+ if (bytesWritten != 0)
{
- Debug.Assert(bytesWritten == 0);
+ _atFrameBoundary = false;
return true;
}
+
+ if (destination.IsEmpty)
+ {
+ // The caller provided a zero-byte buffer. This is typically done in order to avoid allocating/renting
+ // a buffer until data is known to be available. We don't have perfect knowledge here, as _decoder.Decompress
+ // will return DestinationTooSmall whether or not more data is required. As such, we assume that if there's
+ // any data in our input buffer, it would have been decompressible into at least one byte of output, and
+ // otherwise we need to do a read on the underlying stream. This isn't perfect, because having input data
+ // doesn't necessarily mean it'll decompress into at least one byte of output, but it's a reasonable approximation
+ // for the 99% case. If it's wrong, it just means that a caller using zero-byte reads as a way to delay
+ // getting a buffer to use for a subsequent call may end up getting one earlier than otherwise preferred.
+ Debug.Assert(status == OperationStatus.DestinationTooSmall);
+ if (_buffer.ActiveLength != 0)
+ {
+ Debug.Assert(bytesWritten == 0);
+ return true;
+ }
+ }
+
+ Debug.Assert(
+ status == OperationStatus.NeedMoreData ||
+ (status == OperationStatus.DestinationTooSmall && destination.IsEmpty && _buffer.ActiveLength == 0), $"{nameof(status)} == {status}, {nameof(destination.Length)} == {destination.Length}");
+
+ _atFrameBoundary = false;
+ return false;
+ }
+ }
+
+ ///
+ /// Returns whether begins with a Zstandard frame magic number — a standard
+ /// frame (0xFD2FB528) or a skippable frame (0x184D2A50–0x184D2A5F) — which indicates that another
+ /// concatenated frame follows. Used to distinguish a subsequent frame from trailing data after the
+ /// final frame. Requires at least bytes.
+ ///
+ private static bool StartsWithZstdFrame(ReadOnlySpan data)
+ {
+ if (data.Length < ZstdFrameMagicLength)
+ {
+ return false;
}
- Debug.Assert(
- lastResult == OperationStatus.NeedMoreData ||
- (lastResult == OperationStatus.DestinationTooSmall && destination.IsEmpty && _buffer.ActiveLength == 0), $"{nameof(lastResult)} == {lastResult}, {nameof(destination.Length)} == {destination.Length}");
+ const uint ZstdFrameMagic = 0xFD2FB528;
+ const uint SkippableFrameMagicMin = 0x184D2A50;
+ const uint SkippableFrameMagicMax = 0x184D2A5F;
- return false;
+ uint magic = BinaryPrimitives.ReadUInt32LittleEndian(data);
+ return magic == ZstdFrameMagic || (magic >= SkippableFrameMagicMin && magic <= SkippableFrameMagicMax);
}
/// Reads decompressed bytes from the underlying stream and places them in the specified array.
@@ -122,8 +211,14 @@ public override int Read(Span buffer)
int bytesRead = _stream.Read(_buffer.AvailableSpan);
if (bytesRead <= 0)
{
- if (_nonEmptyInput && !buffer.IsEmpty)
+ // Only treat end-of-input as truncation if we're in the middle of a frame.
+ // If we're at a frame boundary (_atFrameBoundary), the stream ended cleanly
+ // after the last of one or more concatenated frames; report Done so that any
+ // unconsumed trailing bytes are rewound on a seekable base stream.
+ if (_nonEmptyInput && !buffer.IsEmpty && !_atFrameBoundary)
ThrowTruncatedInvalidData();
+ if (_atFrameBoundary)
+ lastResult = OperationStatus.Done;
break;
}
@@ -199,8 +294,14 @@ public override async ValueTask ReadAsync(Memory buffer, Cancellation
int bytesRead = await _stream.ReadAsync(_buffer.AvailableMemory, cancellationToken).ConfigureAwait(false);
if (bytesRead <= 0)
{
- if (_nonEmptyInput && !buffer.IsEmpty)
+ // Only treat end-of-input as truncation if we're in the middle of a frame.
+ // If we're at a frame boundary (_atFrameBoundary), the stream ended cleanly
+ // after the last of one or more concatenated frames; report Done so that any
+ // unconsumed trailing bytes are rewound on a seekable base stream.
+ if (_nonEmptyInput && !buffer.IsEmpty && !_atFrameBoundary)
ThrowTruncatedInvalidData();
+ if (_atFrameBoundary)
+ lastResult = OperationStatus.Done;
break;
}
diff --git a/src/libraries/System.IO.Compression/tests/Zstandard/CompressionStreamUnitTests.Zstandard.cs b/src/libraries/System.IO.Compression/tests/Zstandard/CompressionStreamUnitTests.Zstandard.cs
index aa6f961de3d77b..128150d1bad143 100644
--- a/src/libraries/System.IO.Compression/tests/Zstandard/CompressionStreamUnitTests.Zstandard.cs
+++ b/src/libraries/System.IO.Compression/tests/Zstandard/CompressionStreamUnitTests.Zstandard.cs
@@ -3,6 +3,7 @@
using System.Buffers;
using System.Linq;
+using System.Threading;
using System.Threading.Tasks;
using Microsoft.DotNet.RemoteExecutor;
using Xunit.Sdk;
@@ -282,5 +283,208 @@ public void StreamTruncation_IsDetected(TestScenario testScenario)
}, testScenario.ToString()).Dispose();
}
+ // Compresses data into a single Zstandard frame.
+ private static byte[] CompressToSingleFrame(byte[] data)
+ {
+ byte[] buffer = new byte[ZstandardEncoder.GetMaxCompressedLength(data.Length)];
+ Assert.True(ZstandardEncoder.TryCompress(data, buffer, out int compressedLength));
+ Array.Resize(ref buffer, compressedLength);
+ return buffer;
+ }
+
+ // Regression test for https://github.com/dotnet/runtime/issues/129038.
+ // A Zstandard stream can be a sequence of frames concatenated back-to-back (RFC 8878 §3),
+ // which is exactly what HTTP responses with Content-Encoding: zstd produce for large bodies.
+ // Previously ZstandardStream stopped after the first frame, silently truncating the output;
+ // it must now decode every frame.
+ [Theory]
+ [InlineData(false)]
+ [InlineData(true)]
+ public async Task ZstandardStream_ConcatenatedFrames_DecompressesAllFrames(bool async)
+ {
+ // Use payloads large enough to span the 64 KB internal buffer so the bug (truncation at
+ // the first frame boundary) would manifest, and different sizes so the total length alone
+ // proves that both frames were decoded.
+ byte[] first = ZstandardTestUtils.CreateTestData(120_000);
+ byte[] second = ZstandardTestUtils.CreateTestData(90_000);
+ byte[] expected = [.. first, .. second];
+
+ byte[] body = [.. CompressToSingleFrame(first), .. CompressToSingleFrame(second)];
+
+ using MemoryStream input = new(body);
+ using MemoryStream output = new();
+ using (ZstandardStream decompressor = new(input, CompressionMode.Decompress, leaveOpen: true))
+ {
+ if (async)
+ {
+ await decompressor.CopyToAsync(output);
+ }
+ else
+ {
+ decompressor.CopyTo(output);
+ }
+ }
+
+ Assert.Equal(expected.Length, output.Length);
+ Assert.Equal(expected, output.ToArray());
+ }
+
+ // The next frame's magic number can be split across underlying reads. A stream that yields a
+ // single byte per read forces every frame boundary to be discovered across multiple reads, and
+ // also exercises the non-seekable path (no rewind), as used by HttpClient automatic decompression.
+ [Theory]
+ [InlineData(false)]
+ [InlineData(true)]
+ public async Task ZstandardStream_ConcatenatedFrames_AcrossReads_DecompressesAllFrames(bool async)
+ {
+ byte[] first = ZstandardTestUtils.CreateTestData(8_000);
+ byte[] second = ZstandardTestUtils.CreateTestData(5_000);
+ byte[] expected = [.. first, .. second];
+
+ byte[] body = [.. CompressToSingleFrame(first), .. CompressToSingleFrame(second)];
+
+ using Stream input = new SingleByteReadStream(body);
+ using MemoryStream output = new();
+ using (ZstandardStream decompressor = new(input, CompressionMode.Decompress, leaveOpen: true))
+ {
+ if (async)
+ {
+ await decompressor.CopyToAsync(output);
+ }
+ else
+ {
+ decompressor.CopyTo(output);
+ }
+ }
+
+ Assert.Equal(expected, output.ToArray());
+ }
+
+ // Trailing data that is not a Zstandard frame after the final frame must be left untouched (the
+ // stream is complete), mirroring how DeflateStream handles data after the last gzip member.
+ [Theory]
+ [InlineData(false)]
+ [InlineData(true)]
+ public async Task ZstandardStream_FrameFollowedByTrailingData_StopsAtEndOfFrame(bool async)
+ {
+ byte[] payload = ZstandardTestUtils.CreateTestData(10_000);
+ byte[] frame = CompressToSingleFrame(payload);
+ byte[] trailing = Enumerable.Range(0, 64).Select(i => (byte)(i + 1)).ToArray();
+
+ using MemoryStream input = new([.. frame, .. trailing]);
+ using MemoryStream output = new();
+ using (ZstandardStream decompressor = new(input, CompressionMode.Decompress, leaveOpen: true))
+ {
+ if (async)
+ {
+ await decompressor.CopyToAsync(output);
+ }
+ else
+ {
+ decompressor.CopyTo(output);
+ }
+ }
+
+ Assert.Equal(payload, output.ToArray());
+
+ // The base stream (seekable) is rewound to the exact end of the compressed frame, so the
+ // trailing bytes remain available to the caller.
+ byte[] remainder = new byte[trailing.Length];
+ int read = input.Read(remainder, 0, remainder.Length);
+ Assert.Equal(trailing.Length, read);
+ Assert.Equal(trailing, remainder);
+ }
+
+ // Same as above, but with trailing data shorter than a frame magic number (1-3 bytes). This is
+ // the boundary case where the decoder cannot immediately tell a split next-frame magic from
+ // trailing data; the stream must still end cleanly and leave the trailing bytes on the (seekable)
+ // base stream, consistent with the >= 4 byte case.
+ [Theory]
+ [InlineData(false, 1)]
+ [InlineData(true, 1)]
+ [InlineData(false, 2)]
+ [InlineData(true, 2)]
+ [InlineData(false, 3)]
+ [InlineData(true, 3)]
+ public async Task ZstandardStream_FrameFollowedByShortTrailingData_StopsAtEndOfFrame(bool async, int trailingLength)
+ {
+ byte[] payload = ZstandardTestUtils.CreateTestData(10_000);
+ byte[] frame = CompressToSingleFrame(payload);
+ byte[] trailing = Enumerable.Range(1, trailingLength).Select(i => (byte)i).ToArray();
+
+ using MemoryStream input = new([.. frame, .. trailing]);
+ using MemoryStream output = new();
+ using (ZstandardStream decompressor = new(input, CompressionMode.Decompress, leaveOpen: true))
+ {
+ if (async)
+ {
+ await decompressor.CopyToAsync(output);
+ }
+ else
+ {
+ decompressor.CopyTo(output);
+ }
+ }
+
+ Assert.Equal(payload, output.ToArray());
+
+ byte[] remainder = new byte[trailing.Length];
+ int read = input.Read(remainder, 0, remainder.Length);
+ Assert.Equal(trailing.Length, read);
+ Assert.Equal(trailing, remainder);
+ }
+
+ // A non-seekable, read-only stream that returns at most one byte per read.
+ private sealed class SingleByteReadStream : Stream
+ {
+ private readonly byte[] _data;
+ private int _position;
+
+ public SingleByteReadStream(byte[] data) => _data = data;
+
+ public override bool CanRead => true;
+ public override bool CanSeek => false;
+ public override bool CanWrite => false;
+ public override long Length => throw new NotSupportedException();
+ public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
+
+ public override int Read(byte[] buffer, int offset, int count) => Read(buffer.AsSpan(offset, count));
+
+ public override int Read(Span buffer)
+ {
+ if (buffer.IsEmpty || _position >= _data.Length)
+ {
+ return 0;
+ }
+
+ buffer[0] = _data[_position++];
+ return 1;
+ }
+
+ public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
+ {
+ if (cancellationToken.IsCancellationRequested)
+ {
+ return Task.FromCanceled(cancellationToken);
+ }
+
+ return Task.FromResult(Read(buffer.AsSpan(offset, count)));
+ }
+
+ public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default)
+ {
+ if (cancellationToken.IsCancellationRequested)
+ {
+ return ValueTask.FromCanceled(cancellationToken);
+ }
+
+ return new ValueTask(Read(buffer.Span));
+ }
+
+ public override void Flush() { }
+ public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
+ public override void SetLength(long value) => throw new NotSupportedException();
+ public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
+ }
}
}