From 449a3b4472680b3a801747bd1ba076fe0cb4a1c4 Mon Sep 17 00:00:00 2001 From: Nathan Date: Sun, 10 Apr 2022 12:34:53 +0100 Subject: [PATCH 1/4] add benchmarks for tar input and output streams --- .../Tar/TarInputStream.cs | 56 +++++++++++++++++++ .../Tar/TarOutputStream.cs | 39 +++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs create mode 100644 benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs new file mode 100644 index 000000000..27ecdb82f --- /dev/null +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs @@ -0,0 +1,56 @@ +using System.IO; +using System.Security.Cryptography; +using System.Text; +using BenchmarkDotNet.Attributes; +using ICSharpCode.SharpZipLib.Tar; + +namespace ICSharpCode.SharpZipLib.Benchmark.Tar +{ + [MemoryDiagnoser] + [Config(typeof(MultipleRuntimes))] + public class TarInputStream + { + private readonly byte[] archivedData; + private readonly byte[] readBuffer = new byte[1024]; + + public TarInputStream() + { + using (var outputMemoryStream = new MemoryStream()) + { + using (var zipOutputStream = new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) + { + var tarEntry = TarEntry.CreateTarEntry("some file"); + tarEntry.Size = 1024 * 1024; + zipOutputStream.PutNextEntry(tarEntry); + + var rng = RandomNumberGenerator.Create(); + var inputBuffer = new byte[1024]; + rng.GetBytes(inputBuffer); + + for (int i = 0; i < 1024; i++) + { + zipOutputStream.Write(inputBuffer, 0, inputBuffer.Length); + } + } + + archivedData = outputMemoryStream.ToArray(); + } + } + + [Benchmark] + public long ReadTarInputStream() + { + using (var memoryStream = new MemoryStream(archivedData)) + using(var zipInputStream = new ICSharpCode.SharpZipLib.Tar.TarInputStream(memoryStream, Encoding.UTF8)) + { + var entry = zipInputStream.GetNextEntry(); + + while (zipInputStream.Read(readBuffer, 0, readBuffer.Length) > 0) + { + } + + return entry.Size; + } + } + } +} diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs new file mode 100644 index 000000000..272583876 --- /dev/null +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs @@ -0,0 +1,39 @@ +using System.IO; +using System.Security.Cryptography; +using System.Text; +using BenchmarkDotNet.Attributes; +using ICSharpCode.SharpZipLib.Tar; + +namespace ICSharpCode.SharpZipLib.Benchmark.Tar +{ + [MemoryDiagnoser] + [Config(typeof(MultipleRuntimes))] + public class TarOutputStream + { + private readonly byte[] _backingArray = new byte[1024 * 1024 + (6 * 1024)]; + + [Benchmark] + public void WriteTarOutputStream() + { + using (var outputMemoryStream = new MemoryStream(_backingArray)) + { + using (var tarOutputStream = + new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) + { + var tarEntry = TarEntry.CreateTarEntry("some file"); + tarEntry.Size = 1024 * 1024; + tarOutputStream.PutNextEntry(tarEntry); + + var rng = RandomNumberGenerator.Create(); + var inputBuffer = new byte[1024]; + rng.GetBytes(inputBuffer); + + for (int i = 0; i < 1024; i++) + { + tarOutputStream.Write(inputBuffer, 0, inputBuffer.Length); + } + } + } + } + } +} From 0520ca1836983b4ac26ae715785fe5990402474b Mon Sep 17 00:00:00 2001 From: Nathan Date: Sun, 10 Apr 2022 13:04:58 +0100 Subject: [PATCH 2/4] reduce allocations in tar reader and writer --- .../Program.cs | 11 +- .../Tar/TarInputStream.cs | 29 +++- .../Tar/TarOutputStream.cs | 32 +++- .../Core/StringBuilderPool.cs | 22 +++ .../ICSharpCode.SharpZipLib.csproj | 12 +- src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs | 28 +++- src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs | 93 +++--------- src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs | 119 ++++++++------- .../Tar/TarInputStream.cs | 137 ++++++++++-------- .../Tar/TarOutputStream.cs | 14 +- 10 files changed, 291 insertions(+), 206 deletions(-) create mode 100644 src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs index 9c79e6551..da419a518 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs @@ -1,9 +1,8 @@ -using System; -using BenchmarkDotNet; -using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Configs; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Running; using BenchmarkDotNet.Toolchains.CsProj; +using ICSharpCode.SharpZipLib.Benchmark.Tar; namespace ICSharpCode.SharpZipLib.Benchmark { @@ -22,6 +21,12 @@ class Program static void Main(string[] args) { BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); + + // var output = new TarOutputStream(); + // for (int i = 0; i < 1_000_000; i++) + // { + // output.WriteTarOutputStream(); + // } } } } diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs index 27ecdb82f..71abff11b 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs @@ -1,6 +1,9 @@ +using System; using System.IO; using System.Security.Cryptography; using System.Text; +using System.Threading; +using System.Threading.Tasks; using BenchmarkDotNet.Attributes; using ICSharpCode.SharpZipLib.Tar; @@ -17,7 +20,8 @@ public TarInputStream() { using (var outputMemoryStream = new MemoryStream()) { - using (var zipOutputStream = new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) + using (var zipOutputStream = + new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) { var tarEntry = TarEntry.CreateTarEntry("some file"); tarEntry.Size = 1024 * 1024; @@ -41,7 +45,7 @@ public TarInputStream() public long ReadTarInputStream() { using (var memoryStream = new MemoryStream(archivedData)) - using(var zipInputStream = new ICSharpCode.SharpZipLib.Tar.TarInputStream(memoryStream, Encoding.UTF8)) + using (var zipInputStream = new ICSharpCode.SharpZipLib.Tar.TarInputStream(memoryStream, Encoding.UTF8)) { var entry = zipInputStream.GetNextEntry(); @@ -52,5 +56,26 @@ public long ReadTarInputStream() return entry.Size; } } + + [Benchmark] + public async Task ReadTarInputStreamAsync() + { + using (var memoryStream = new MemoryStream(archivedData)) + using (var zipInputStream = new ICSharpCode.SharpZipLib.Tar.TarInputStream(memoryStream, Encoding.UTF8)) + { + var entry = await zipInputStream.GetNextEntryAsync(CancellationToken.None); +#if NETCOREAPP2_1_OR_GREATER + while (await zipInputStream.ReadAsync(readBuffer.AsMemory()) > 0) + { + } +#else + while (await zipInputStream.ReadAsync(readBuffer, 0, readBuffer.Length) > 0) + { + } +#endif + + return entry.Size; + } + } } } diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs index 272583876..83b9fc9ee 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs @@ -1,6 +1,8 @@ using System.IO; using System.Security.Cryptography; using System.Text; +using System.Threading; +using System.Threading.Tasks; using BenchmarkDotNet.Attributes; using ICSharpCode.SharpZipLib.Tar; @@ -11,6 +13,8 @@ namespace ICSharpCode.SharpZipLib.Benchmark.Tar public class TarOutputStream { private readonly byte[] _backingArray = new byte[1024 * 1024 + (6 * 1024)]; + private readonly byte[] _inputBuffer = new byte[1024]; + private static readonly RandomNumberGenerator _rng = RandomNumberGenerator.Create(); [Benchmark] public void WriteTarOutputStream() @@ -24,13 +28,33 @@ public void WriteTarOutputStream() tarEntry.Size = 1024 * 1024; tarOutputStream.PutNextEntry(tarEntry); - var rng = RandomNumberGenerator.Create(); - var inputBuffer = new byte[1024]; - rng.GetBytes(inputBuffer); + _rng.GetBytes(_inputBuffer); for (int i = 0; i < 1024; i++) { - tarOutputStream.Write(inputBuffer, 0, inputBuffer.Length); + tarOutputStream.Write(_inputBuffer, 0, _inputBuffer.Length); + } + } + } + } + + [Benchmark] + public async Task WriteTarOutputStreamAsync() + { + using (var outputMemoryStream = new MemoryStream(_backingArray)) + { + using (var tarOutputStream = + new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) + { + var tarEntry = TarEntry.CreateTarEntry("some file"); + tarEntry.Size = 1024 * 1024; + await tarOutputStream.PutNextEntryAsync(tarEntry, CancellationToken.None); + + _rng.GetBytes(_inputBuffer); + + for (int i = 0; i < 1024; i++) + { + await tarOutputStream.WriteAsync(_inputBuffer, 0, _inputBuffer.Length); } } } diff --git a/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs b/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs new file mode 100644 index 000000000..08e322a3e --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs @@ -0,0 +1,22 @@ +using System.Collections.Generic; +using System.Text; + +namespace ICSharpCode.SharpZipLib.Core +{ + class StringBuilderPool + { + public static StringBuilderPool Instance { get; } = new StringBuilderPool(); + private readonly Queue pool = new Queue(); + + public StringBuilder Rent() + { + return pool.Count > 0 ? pool.Dequeue() : new StringBuilder(); + } + + public void Return(StringBuilder builder) + { + builder.Clear(); + pool.Enqueue(builder); + } + } +} diff --git a/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj b/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj index 066c4fb43..e3167501f 100644 --- a/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj +++ b/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj @@ -33,8 +33,16 @@ Please see https://github.com/icsharpcode/SharpZipLib/wiki/Release-1.3.3 for mor - - + + + + + + + + + + True images diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs index 744c13189..0fe98faf4 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.IO; namespace ICSharpCode.SharpZipLib.Tar @@ -207,7 +208,7 @@ private void Initialize(int archiveBlockFactor) { blockFactor = archiveBlockFactor; recordSize = archiveBlockFactor * BlockSize; - recordBuffer = new byte[RecordSize]; + recordBuffer = ArrayPool.Shared.Rent(RecordSize); if (inputStream != null) { @@ -334,6 +335,30 @@ public byte[] ReadBlock() currentBlockIndex++; return result; } + + internal void ReadBlockInt(Span buffer) + { + if (buffer.Length != BlockSize) + { + throw new ArgumentException("BUG: buffer must have length BlockSize"); + } + + if (inputStream == null) + { + throw new TarException("TarBuffer.ReadBlock - no input stream defined"); + } + + if (currentBlockIndex >= BlockFactor) + { + if (!ReadRecord()) + { + throw new TarException("Failed to read a record"); + } + } + + recordBuffer.AsSpan().Slice(currentBlockIndex* BlockSize, BlockSize).CopyTo(buffer); + currentBlockIndex++; + } /// /// Read a record from data stream. @@ -580,6 +605,7 @@ public void Close() } inputStream = null; } + ArrayPool.Shared.Return(recordBuffer); } #region Instance Fields diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs b/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs index 262c12ad3..9a986de86 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs @@ -114,7 +114,8 @@ public object Clone() public static TarEntry CreateTarEntry(string name) { var entry = new TarEntry(); - TarEntry.NameTarHeader(entry.header, name); + + entry.NameTarHeader(name); return entry; } @@ -188,10 +189,7 @@ public bool IsDescendent(TarEntry toTest) /// public TarHeader TarHeader { - get - { - return header; - } + get { return header; } } /// @@ -199,14 +197,8 @@ public TarHeader TarHeader /// public string Name { - get - { - return header.Name; - } - set - { - header.Name = value; - } + get { return header.Name; } + set { header.Name = value; } } /// @@ -214,14 +206,8 @@ public string Name /// public int UserId { - get - { - return header.UserId; - } - set - { - header.UserId = value; - } + get { return header.UserId; } + set { header.UserId = value; } } /// @@ -229,14 +215,8 @@ public int UserId /// public int GroupId { - get - { - return header.GroupId; - } - set - { - header.GroupId = value; - } + get { return header.GroupId; } + set { header.GroupId = value; } } /// @@ -244,14 +224,8 @@ public int GroupId /// public string UserName { - get - { - return header.UserName; - } - set - { - header.UserName = value; - } + get { return header.UserName; } + set { header.UserName = value; } } /// @@ -259,14 +233,8 @@ public string UserName /// public string GroupName { - get - { - return header.GroupName; - } - set - { - header.GroupName = value; - } + get { return header.GroupName; } + set { header.GroupName = value; } } /// @@ -304,14 +272,8 @@ public void SetNames(string userName, string groupName) /// public DateTime ModTime { - get - { - return header.ModTime; - } - set - { - header.ModTime = value; - } + get { return header.ModTime; } + set { header.ModTime = value; } } /// @@ -322,10 +284,7 @@ public DateTime ModTime /// public string File { - get - { - return file; - } + get { return file; } } /// @@ -333,14 +292,8 @@ public string File /// public long Size { - get - { - return header.Size; - } - set - { - header.Size = value; - } + get { return header.Size; } + set { header.Size = value; } } /// @@ -450,7 +403,8 @@ public void GetFileTarHeader(TarHeader header, string file) header.Size = new FileInfo(file.Replace('/', Path.DirectorySeparatorChar)).Length; } - header.ModTime = System.IO.File.GetLastWriteTime(file.Replace('/', Path.DirectorySeparatorChar)).ToUniversalTime(); + header.ModTime = System.IO.File.GetLastWriteTime(file.Replace('/', Path.DirectorySeparatorChar)) + .ToUniversalTime(); header.DevMajor = 0; header.DevMinor = 0; } @@ -549,13 +503,8 @@ static public void AdjustEntryName(byte[] buffer, string newName, Encoding nameE /// /// The tar entry name. /// - static public void NameTarHeader(TarHeader header, string name) + public void NameTarHeader(string name) { - if (header == null) - { - throw new ArgumentNullException(nameof(header)); - } - if (name == null) { throw new ArgumentNullException(nameof(name)); diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs b/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs index 3bd1bdffe..8a6d263a0 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs @@ -1,5 +1,7 @@ using System; +using System.Buffers; using System.Text; +using ICSharpCode.SharpZipLib.Core; namespace ICSharpCode.SharpZipLib.Tar { @@ -235,7 +237,7 @@ public class TarHeader /// public const string GNU_TMAGIC = "ustar "; - private const long timeConversionFactor = 10000000L; // 1 tick == 100 nanoseconds + private const long timeConversionFactor = 10000000L; // 1 tick == 100 nanoseconds private static readonly DateTime dateTime1970 = new DateTime(1970, 1, 1, 0, 0, 0, 0); #endregion Constants @@ -540,7 +542,7 @@ public void ParseBuffer(byte[] header, Encoding nameEncoding) int offset = 0; - name = ParseName(header, offset, NAMELEN, nameEncoding).ToString(); + name = ParseName(header, offset, NAMELEN, nameEncoding); offset += NAMELEN; mode = (int)ParseOctal(header, offset, MODELEN); @@ -563,21 +565,21 @@ public void ParseBuffer(byte[] header, Encoding nameEncoding) TypeFlag = header[offset++]; - LinkName = ParseName(header, offset, NAMELEN, nameEncoding).ToString(); + LinkName = ParseName(header, offset, NAMELEN, nameEncoding); offset += NAMELEN; - Magic = ParseName(header, offset, MAGICLEN, nameEncoding).ToString(); + Magic = ParseName(header, offset, MAGICLEN, nameEncoding); offset += MAGICLEN; if (Magic == "ustar") { - Version = ParseName(header, offset, VERSIONLEN, nameEncoding).ToString(); + Version = ParseName(header, offset, VERSIONLEN, nameEncoding); offset += VERSIONLEN; - UserName = ParseName(header, offset, UNAMELEN, nameEncoding).ToString(); + UserName = ParseName(header, offset, UNAMELEN, nameEncoding); offset += UNAMELEN; - GroupName = ParseName(header, offset, GNAMELEN, nameEncoding).ToString(); + GroupName = ParseName(header, offset, GNAMELEN, nameEncoding); offset += GNAMELEN; DevMajor = (int)ParseOctal(header, offset, DEVLEN); @@ -586,7 +588,7 @@ public void ParseBuffer(byte[] header, Encoding nameEncoding) DevMinor = (int)ParseOctal(header, offset, DEVLEN); offset += DEVLEN; - string prefix = ParseName(header, offset, PREFIXLEN, nameEncoding).ToString(); + string prefix = ParseName(header, offset, PREFIXLEN, nameEncoding); if (!string.IsNullOrEmpty(prefix)) Name = prefix + '/' + Name; } @@ -685,30 +687,31 @@ public override int GetHashCode() public override bool Equals(object obj) { var localHeader = obj as TarHeader; - + bool result; if (localHeader != null) { result = (name == localHeader.name) - && (mode == localHeader.mode) - && (UserId == localHeader.UserId) - && (GroupId == localHeader.GroupId) - && (Size == localHeader.Size) - && (ModTime == localHeader.ModTime) - && (Checksum == localHeader.Checksum) - && (TypeFlag == localHeader.TypeFlag) - && (LinkName == localHeader.LinkName) - && (Magic == localHeader.Magic) - && (Version == localHeader.Version) - && (UserName == localHeader.UserName) - && (GroupName == localHeader.GroupName) - && (DevMajor == localHeader.DevMajor) - && (DevMinor == localHeader.DevMinor); + && (mode == localHeader.mode) + && (UserId == localHeader.UserId) + && (GroupId == localHeader.GroupId) + && (Size == localHeader.Size) + && (ModTime == localHeader.ModTime) + && (Checksum == localHeader.Checksum) + && (TypeFlag == localHeader.TypeFlag) + && (LinkName == localHeader.LinkName) + && (Magic == localHeader.Magic) + && (Version == localHeader.Version) + && (UserName == localHeader.UserName) + && (GroupName == localHeader.GroupName) + && (DevMajor == localHeader.DevMajor) + && (DevMinor == localHeader.DevMinor); } else { result = false; } + return result; } @@ -719,7 +722,7 @@ public override bool Equals(object obj) /// Value to apply as a default for userName. /// Value to apply as a default for groupId. /// Value to apply as a default for groupName. - static internal void SetValueDefaults(int userId, string userName, int groupId, string groupName) + internal static void SetValueDefaults(int userId, string userName, int groupId, string groupName) { defaultUserId = userIdAsSet = userId; defaultUser = userNameAsSet = userName; @@ -727,7 +730,7 @@ static internal void SetValueDefaults(int userId, string userName, int groupId, defaultGroupName = groupNameAsSet = groupName; } - static internal void RestoreSetValues() + internal static void RestoreSetValues() { defaultUserId = userIdAsSet; defaultUser = userNameAsSet; @@ -737,7 +740,7 @@ static internal void RestoreSetValues() // Return value that may be stored in octal or binary. Length must exceed 8. // - static private long ParseBinaryOrOctal(byte[] header, int offset, int length) + private static long ParseBinaryOrOctal(byte[] header, int offset, int length) { if (header[offset] >= 0x80) { @@ -759,7 +762,7 @@ static private long ParseBinaryOrOctal(byte[] header, int offset, int length) /// The offset into the buffer from which to parse. /// The number of header bytes to parse. /// The long equivalent of the octal string. - static public long ParseOctal(byte[] header, int offset, int length) + public static long ParseOctal(byte[] header, int offset, int length) { if (header == null) { @@ -814,7 +817,7 @@ static public long ParseOctal(byte[] header, int offset, int length) /// The name parsed. /// [Obsolete("No Encoding for Name field is specified, any non-ASCII bytes will be discarded")] - static public StringBuilder ParseName(byte[] header, int offset, int length) + public static string ParseName(byte[] header, int offset, int length) { return ParseName(header, offset, length, null); } @@ -837,7 +840,7 @@ static public StringBuilder ParseName(byte[] header, int offset, int length) /// /// The name parsed. /// - static public StringBuilder ParseName(byte[] header, int offset, int length, Encoding encoding) + public static string ParseName(byte[] header, int offset, int length, Encoding encoding) { if (header == null) { @@ -859,10 +862,10 @@ static public StringBuilder ParseName(byte[] header, int offset, int length, Enc throw new ArgumentException("Exceeds header size", nameof(length)); } - var result = new StringBuilder(length); + var builder = StringBuilderPool.Instance.Rent(); int count = 0; - if(encoding == null) + if (encoding == null) { for (int i = offset; i < offset + length; ++i) { @@ -870,21 +873,25 @@ static public StringBuilder ParseName(byte[] header, int offset, int length, Enc { break; } - result.Append((char)header[i]); + + builder.Append((char)header[i]); } } else { - for(int i = offset; i < offset + length; ++i, ++count) + for (int i = offset; i < offset + length; ++i, ++count) { - if(header[i] == 0) + if (header[i] == 0) { break; } } - result.Append(encoding.GetString(header, offset, count)); + + builder.Append(encoding.GetString(header, offset, count)); } + var result = builder.ToString(); + StringBuilderPool.Instance.Return(builder); return result; } @@ -926,7 +933,8 @@ public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int b /// The number of characters/bytes to add /// name encoding, or null for ASCII only /// The next free index in the - public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int bufferOffset, int length, Encoding encoding) + public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int bufferOffset, int length, + Encoding encoding) { if (name == null) { @@ -939,14 +947,17 @@ public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int b } int i; - if(encoding != null) + if (encoding != null) { // it can be more sufficient if using Span or unsafe - var nameArray = name.ToCharArray(nameOffset, Math.Min(name.Length - nameOffset, length)); + ReadOnlySpan nameArray = name.AsSpan().Slice(nameOffset, Math.Min(name.Length - nameOffset, length)); + var charArray = ArrayPool.Shared.Rent(nameArray.Length); + nameArray.CopyTo(charArray); + // it can be more sufficient if using Span(or unsafe?) and ArrayPool for temporary buffer - var bytes = encoding.GetBytes(nameArray, 0, nameArray.Length); - i = Math.Min(bytes.Length, length); - Array.Copy(bytes, 0, buffer, bufferOffset, i); + var bytesLength = encoding.GetBytes(charArray, 0, nameArray.Length, buffer, bufferOffset); + ArrayPool.Shared.Return(charArray); + i = Math.Min(bytesLength, length); } else { @@ -1085,7 +1096,8 @@ public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int /// The number of ascii characters to add. /// String encoding, or null for ASCII only /// The next free index in the buffer. - public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int bufferOffset, int length, Encoding encoding) + public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int bufferOffset, int length, + Encoding encoding) { if (toAdd == null) { @@ -1098,7 +1110,7 @@ public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int } int i; - if(encoding == null) + if (encoding == null) { for (i = 0; i < length && nameOffset + i < toAdd.Length; ++i) { @@ -1179,8 +1191,9 @@ public static int GetOctalBytes(long value, byte[] buffer, int offset, int lengt private static int GetBinaryOrOctalBytes(long value, byte[] buffer, int offset, int length) { if (value > 0x1FFFFFFFF) - { // Octal 77777777777 (11 digits) - // Put value as binary, right-justified into the buffer. Set high order bit of left-most byte. + { + // Octal 77777777777 (11 digits) + // Put value as binary, right-justified into the buffer. Set high order bit of left-most byte. for (int pos = length - 1; pos > 0; pos--) { buffer[offset + pos] = (byte)value; @@ -1294,16 +1307,16 @@ private static DateTime GetDateTimeFromCTime(long ticks) #region Class Fields // Values used during recursive operations. - static internal int userIdAsSet; + internal static int userIdAsSet; - static internal int groupIdAsSet; - static internal string userNameAsSet; - static internal string groupNameAsSet = "None"; + internal static int groupIdAsSet; + internal static string userNameAsSet; + internal static string groupNameAsSet = "None"; - static internal int defaultUserId; - static internal int defaultGroupId; - static internal string defaultGroupName = "None"; - static internal string defaultUser; + internal static int defaultUserId; + internal static int defaultGroupId; + internal static string defaultGroupName = "None"; + internal static string defaultUser; #endregion Class Fields } diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs index f1a3622de..c87b6ff68 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.IO; using System.Text; @@ -23,6 +24,7 @@ public TarInputStream(Stream inputStream) : this(inputStream, TarBuffer.DefaultBlockFactor, null) { } + /// /// Construct a TarInputStream with default block factor /// @@ -79,10 +81,7 @@ public bool IsStreamOwner /// public override bool CanRead { - get - { - return inputStream.CanRead; - } + get { return inputStream.CanRead; } } /// @@ -91,10 +90,7 @@ public override bool CanRead /// public override bool CanSeek { - get - { - return false; - } + get { return false; } } /// @@ -103,10 +99,7 @@ public override bool CanSeek /// public override bool CanWrite { - get - { - return false; - } + get { return false; } } /// @@ -114,10 +107,7 @@ public override bool CanWrite /// public override long Length { - get - { - return inputStream.Length; - } + get { return inputStream.Length; } } /// @@ -127,14 +117,8 @@ public override long Length /// Any attempt to set position public override long Position { - get - { - return inputStream.Position; - } - set - { - throw new NotSupportedException("TarInputStream Seek not supported"); - } + get { return inputStream.Position; } + set { throw new NotSupportedException("TarInputStream Seek not supported"); } } /// @@ -205,6 +189,7 @@ public override int ReadByte() // return -1 to indicate that no byte was read. return -1; } + return oneByteBuffer[0]; } @@ -270,28 +255,26 @@ public override int Read(byte[] buffer, int offset, int count) offset += sz; } + var recLen = TarBuffer.BlockSize; + var recBuf = ArrayPool.Shared.Rent(recLen); + var recBufSpan = recBuf.AsSpan(); + while (numToRead > 0) { - byte[] rec = tarBuffer.ReadBlock(); - if (rec == null) - { - // Unexpected EOF! - throw new TarException("unexpected EOF with " + numToRead + " bytes unread"); - } + tarBuffer.ReadBlockInt(recBuf); var sz = (int)numToRead; - int recLen = rec.Length; if (recLen > sz) { - Array.Copy(rec, 0, buffer, offset, sz); - readBuffer = new byte[recLen - sz]; - Array.Copy(rec, sz, readBuffer, 0, recLen - sz); + recBufSpan.Slice(0, sz).CopyTo(buffer.AsSpan().Slice(offset, sz)); + readBuffer = ArrayPool.Shared.Rent(recLen - sz); + recBufSpan.Slice(sz, recLen - sz).CopyTo(readBuffer); } else { sz = recLen; - Array.Copy(rec, 0, buffer, offset, recLen); + recBufSpan.CopyTo(buffer.AsSpan().Slice(offset, recLen)); } totalRead += sz; @@ -299,6 +282,8 @@ public override int Read(byte[] buffer, int offset, int count) offset += sz; } + ArrayPool.Shared.Return(recBuf); + entryOffset += totalRead; return totalRead; @@ -359,10 +344,7 @@ public int GetRecordSize() /// public long Available { - get - { - return entrySize - entryOffset; - } + get { return entrySize - entryOffset; } } /// @@ -402,10 +384,7 @@ public void Skip(long skipCount) /// Currently marking is not supported, the return value is always false. public bool IsMarkSupported { - get - { - return false; - } + get { return false; } } /// @@ -450,18 +429,15 @@ public TarEntry GetNextEntry() SkipToNextEntry(); } - byte[] headerBuf = tarBuffer.ReadBlock(); + byte[] headerBuf = ArrayPool.Shared.Rent(TarBuffer.BlockSize); + tarBuffer.ReadBlockInt(headerBuf); - if (headerBuf == null) - { - hasHitEOF = true; - } - else if (TarBuffer.IsEndOfArchiveBlock(headerBuf)) + if (TarBuffer.IsEndOfArchiveBlock(headerBuf)) { hasHitEOF = true; // Read the second zero-filled block - tarBuffer.ReadBlock(); + tarBuffer.ReadBlockInt(headerBuf); } else { @@ -471,6 +447,10 @@ public TarEntry GetNextEntry() if (hasHitEOF) { currentEntry = null; + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } } else { @@ -482,6 +462,7 @@ public TarEntry GetNextEntry() { throw new TarException("Header checksum is invalid"); } + this.entryOffset = 0; this.entrySize = header.Size; @@ -496,7 +477,8 @@ public TarEntry GetNextEntry() while (numToRead > 0) { - int numRead = this.Read(nameBuffer, 0, (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); + int numRead = this.Read(nameBuffer, 0, + (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); if (numRead == -1) { @@ -508,16 +490,18 @@ public TarEntry GetNextEntry() } SkipToNextEntry(); - headerBuf = this.tarBuffer.ReadBlock(); + this.tarBuffer.ReadBlockInt(headerBuf); } else if (header.TypeFlag == TarHeader.LF_GHDR) - { // POSIX global extended header - // Ignore things we dont understand completely for now + { + // POSIX global extended header + // Ignore things we dont understand completely for now SkipToNextEntry(); - headerBuf = this.tarBuffer.ReadBlock(); + this.tarBuffer.ReadBlockInt(headerBuf); } else if (header.TypeFlag == TarHeader.LF_XHDR) - { // POSIX extended header + { + // POSIX extended header byte[] nameBuffer = new byte[TarBuffer.BlockSize]; long numToRead = this.entrySize; @@ -525,7 +509,8 @@ public TarEntry GetNextEntry() while (numToRead > 0) { - int numRead = this.Read(nameBuffer, 0, (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); + int numRead = this.Read(nameBuffer, 0, + (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); if (numRead == -1) { @@ -542,28 +527,32 @@ public TarEntry GetNextEntry() } SkipToNextEntry(); - headerBuf = this.tarBuffer.ReadBlock(); + this.tarBuffer.ReadBlockInt(headerBuf); } else if (header.TypeFlag == TarHeader.LF_GNU_VOLHDR) { // TODO: could show volume name when verbose SkipToNextEntry(); - headerBuf = this.tarBuffer.ReadBlock(); + this.tarBuffer.ReadBlockInt(headerBuf); } else if (header.TypeFlag != TarHeader.LF_NORMAL && - header.TypeFlag != TarHeader.LF_OLDNORM && - header.TypeFlag != TarHeader.LF_LINK && - header.TypeFlag != TarHeader.LF_SYMLINK && - header.TypeFlag != TarHeader.LF_DIR) + header.TypeFlag != TarHeader.LF_OLDNORM && + header.TypeFlag != TarHeader.LF_LINK && + header.TypeFlag != TarHeader.LF_SYMLINK && + header.TypeFlag != TarHeader.LF_DIR) { // Ignore things we dont understand completely for now SkipToNextEntry(); - headerBuf = tarBuffer.ReadBlock(); + tarBuffer.ReadBlockInt(headerBuf); } if (entryFactory == null) { currentEntry = new TarEntry(headerBuf, encoding); + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } if (longName != null) { currentEntry.Name = longName.ToString(); @@ -572,6 +561,10 @@ public TarEntry GetNextEntry() else { currentEntry = entryFactory.CreateEntry(headerBuf); + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } } // Magic was checked here for 'ustar' but there are multiple valid possibilities @@ -587,11 +580,18 @@ public TarEntry GetNextEntry() entrySize = 0; entryOffset = 0; currentEntry = null; + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } string errorText = string.Format("Bad header in record {0} block {1} {2}", tarBuffer.CurrentRecord, tarBuffer.CurrentBlock, ex.Message); throw new InvalidHeaderException(errorText); } } + + ArrayPool.Shared.Return(headerBuf); + return currentEntry; } @@ -613,6 +613,7 @@ public void CopyEntryContents(Stream outputStream) { break; } + outputStream.Write(tempBuffer, 0, numRead); } } @@ -626,6 +627,11 @@ private void SkipToNextEntry() Skip(numToSkip); } + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } + readBuffer = null; } @@ -676,6 +682,7 @@ public interface IEntryFactory public class EntryFactoryAdapter : IEntryFactory { Encoding nameEncoding; + /// /// Construct standard entry factory class with ASCII name encoding /// @@ -683,6 +690,7 @@ public class EntryFactoryAdapter : IEntryFactory public EntryFactoryAdapter() { } + /// /// Construct standard entry factory with name encoding /// @@ -691,6 +699,7 @@ public EntryFactoryAdapter(Encoding nameEncoding) { this.nameEncoding = nameEncoding; } + /// /// Create a based on named /// diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs b/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs index 7c52e6c7c..8b9e4f6c7 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.IO; using System.Text; @@ -50,8 +51,8 @@ public TarOutputStream(Stream outputStream, int blockFactor) this.outputStream = outputStream; buffer = TarBuffer.CreateOutputTarBuffer(outputStream, blockFactor); - assemblyBuffer = new byte[TarBuffer.BlockSize]; - blockBuffer = new byte[TarBuffer.BlockSize]; + assemblyBuffer = ArrayPool.Shared.Rent(TarBuffer.BlockSize); + blockBuffer = ArrayPool.Shared.Rent(TarBuffer.BlockSize); } /// @@ -70,8 +71,8 @@ public TarOutputStream(Stream outputStream, int blockFactor, Encoding nameEncodi this.outputStream = outputStream; buffer = TarBuffer.CreateOutputTarBuffer(outputStream, blockFactor); - assemblyBuffer = new byte[TarBuffer.BlockSize]; - blockBuffer = new byte[TarBuffer.BlockSize]; + assemblyBuffer = ArrayPool.Shared.Rent(TarBuffer.BlockSize); + blockBuffer = ArrayPool.Shared.Rent(TarBuffer.BlockSize); this.nameEncoding = nameEncoding; } @@ -226,6 +227,9 @@ protected override void Dispose(bool disposing) isClosed = true; Finish(); buffer.Close(); + + ArrayPool.Shared.Return(assemblyBuffer); + ArrayPool.Shared.Return(blockBuffer); } } @@ -354,7 +358,7 @@ public override void WriteByte(byte value) { Write(new byte[] { value }, 0, 1); } - + /// /// Writes bytes to the current tar archive entry. This method /// is aware of the current entry and will throw an exception if From 0fd120d00edaa9b9fe5fb9d9c124aa34ac37e870 Mon Sep 17 00:00:00 2001 From: Nathan Date: Sun, 10 Apr 2022 19:15:15 +0100 Subject: [PATCH 3/4] add async-await support to tar --- .../Program.cs | 7 - .../Tar/TarInputStream.cs | 1 + .../Tar/TarOutputStream.cs | 19 +- .../ICSharpCode.SharpZipLib.csproj | 2 + src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs | 177 +++++++---- src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs | 3 - src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs | 163 +++++----- .../Tar/TarInputStream.cs | 278 ++++++++++++++---- .../Tar/TarOutputStream.cs | 179 ++++++++--- .../Tar/TarTests.cs | 13 +- 10 files changed, 573 insertions(+), 269 deletions(-) diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs index da419a518..697e2923a 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs @@ -2,7 +2,6 @@ using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Running; using BenchmarkDotNet.Toolchains.CsProj; -using ICSharpCode.SharpZipLib.Benchmark.Tar; namespace ICSharpCode.SharpZipLib.Benchmark { @@ -21,12 +20,6 @@ class Program static void Main(string[] args) { BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); - - // var output = new TarOutputStream(); - // for (int i = 0; i < 1_000_000; i++) - // { - // output.WriteTarOutputStream(); - // } } } } diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs index 71abff11b..b59a217ab 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarInputStream.cs @@ -64,6 +64,7 @@ public async Task ReadTarInputStreamAsync() using (var zipInputStream = new ICSharpCode.SharpZipLib.Tar.TarInputStream(memoryStream, Encoding.UTF8)) { var entry = await zipInputStream.GetNextEntryAsync(CancellationToken.None); + #if NETCOREAPP2_1_OR_GREATER while (await zipInputStream.ReadAsync(readBuffer.AsMemory()) > 0) { diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs index 83b9fc9ee..f24e83e35 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Tar/TarOutputStream.cs @@ -12,14 +12,14 @@ namespace ICSharpCode.SharpZipLib.Benchmark.Tar [Config(typeof(MultipleRuntimes))] public class TarOutputStream { - private readonly byte[] _backingArray = new byte[1024 * 1024 + (6 * 1024)]; - private readonly byte[] _inputBuffer = new byte[1024]; + private readonly byte[] backingArray = new byte[1024 * 1024 + (6 * 1024)]; + private readonly byte[] inputBuffer = new byte[1024]; private static readonly RandomNumberGenerator _rng = RandomNumberGenerator.Create(); [Benchmark] public void WriteTarOutputStream() { - using (var outputMemoryStream = new MemoryStream(_backingArray)) + using (var outputMemoryStream = new MemoryStream(backingArray)) { using (var tarOutputStream = new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) @@ -28,33 +28,34 @@ public void WriteTarOutputStream() tarEntry.Size = 1024 * 1024; tarOutputStream.PutNextEntry(tarEntry); - _rng.GetBytes(_inputBuffer); + _rng.GetBytes(inputBuffer); for (int i = 0; i < 1024; i++) { - tarOutputStream.Write(_inputBuffer, 0, _inputBuffer.Length); + tarOutputStream.Write(inputBuffer, 0, inputBuffer.Length); } } } } - + [Benchmark] public async Task WriteTarOutputStreamAsync() { - using (var outputMemoryStream = new MemoryStream(_backingArray)) + using (var outputMemoryStream = new MemoryStream(backingArray)) { using (var tarOutputStream = new ICSharpCode.SharpZipLib.Tar.TarOutputStream(outputMemoryStream, Encoding.UTF8)) { var tarEntry = TarEntry.CreateTarEntry("some file"); tarEntry.Size = 1024 * 1024; + await tarOutputStream.PutNextEntryAsync(tarEntry, CancellationToken.None); - _rng.GetBytes(_inputBuffer); + _rng.GetBytes(inputBuffer); for (int i = 0; i < 1024; i++) { - await tarOutputStream.WriteAsync(_inputBuffer, 0, _inputBuffer.Length); + await tarOutputStream.WriteAsync(inputBuffer, 0, inputBuffer.Length); } } } diff --git a/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj b/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj index e3167501f..e736ad1cc 100644 --- a/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj +++ b/src/ICSharpCode.SharpZipLib/ICSharpCode.SharpZipLib.csproj @@ -36,10 +36,12 @@ Please see https://github.com/icsharpcode/SharpZipLib/wiki/Release-1.3.3 for mor + + diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs index 0fe98faf4..cd37e7af4 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs @@ -1,6 +1,8 @@ using System; using System.Buffers; using System.IO; +using System.Threading; +using System.Threading.Tasks; namespace ICSharpCode.SharpZipLib.Tar { @@ -73,10 +75,7 @@ record of N blocks is written with a single 'write ()' /// This is equal to the multiplied by the public int RecordSize { - get - { - return recordSize; - } + get { return recordSize; } } /// @@ -96,10 +95,7 @@ public int GetRecordSize() /// This is the number of blocks in each record. public int BlockFactor { - get - { - return blockFactor; - } + get { return blockFactor; } } /// @@ -291,6 +287,19 @@ public static bool IsEndOfArchiveBlock(byte[] block) /// Skip over a block on the input stream. /// public void SkipBlock() + { + SkipBlockAsync(CancellationToken.None, false).GetAwaiter().GetResult(); + } + + /// + /// Skip over a block on the input stream. + /// + public Task SkipBlockAsync(CancellationToken ct) + { + return SkipBlockAsync(ct, true).AsTask(); + } + + private async ValueTask SkipBlockAsync(CancellationToken ct, bool isAsync) { if (inputStream == null) { @@ -299,7 +308,7 @@ public void SkipBlock() if (currentBlockIndex >= BlockFactor) { - if (!ReadRecord()) + if (!await ReadRecordAsync(ct, isAsync)) { throw new TarException("Failed to read a record"); } @@ -323,7 +332,7 @@ public byte[] ReadBlock() if (currentBlockIndex >= BlockFactor) { - if (!ReadRecord()) + if (!ReadRecordAsync(CancellationToken.None, false).GetAwaiter().GetResult()) { throw new TarException("Failed to read a record"); } @@ -335,14 +344,14 @@ public byte[] ReadBlock() currentBlockIndex++; return result; } - - internal void ReadBlockInt(Span buffer) + + internal async ValueTask ReadBlockIntAsync(byte[] buffer, CancellationToken ct, bool isAsync) { if (buffer.Length != BlockSize) { throw new ArgumentException("BUG: buffer must have length BlockSize"); } - + if (inputStream == null) { throw new TarException("TarBuffer.ReadBlock - no input stream defined"); @@ -350,13 +359,13 @@ internal void ReadBlockInt(Span buffer) if (currentBlockIndex >= BlockFactor) { - if (!ReadRecord()) + if (!await ReadRecordAsync(ct, isAsync)) { throw new TarException("Failed to read a record"); } } - - recordBuffer.AsSpan().Slice(currentBlockIndex* BlockSize, BlockSize).CopyTo(buffer); + + recordBuffer.AsSpan().Slice(currentBlockIndex * BlockSize, BlockSize).CopyTo(buffer); currentBlockIndex++; } @@ -366,7 +375,7 @@ internal void ReadBlockInt(Span buffer) /// /// false if End-Of-File, else true. /// - private bool ReadRecord() + private async ValueTask ReadRecordAsync(CancellationToken ct, bool isAsync) { if (inputStream == null) { @@ -380,7 +389,9 @@ private bool ReadRecord() while (bytesNeeded > 0) { - long numBytes = inputStream.Read(recordBuffer, offset, bytesNeeded); + long numBytes = isAsync + ? await inputStream.ReadAsync(recordBuffer, offset, bytesNeeded, ct) + : inputStream.Read(recordBuffer, offset, bytesNeeded); // // NOTE @@ -463,6 +474,18 @@ public int GetCurrentRecordNum() return currentRecordIndex; } + /// + /// Write a block of data to the archive. + /// + /// + /// The data to write to the archive. + /// + /// + public ValueTask WriteBlockAsync(byte[] block, CancellationToken ct) + { + return WriteBlockAsync(block, 0, ct); + } + /// /// Write a block of data to the archive. /// @@ -471,30 +494,24 @@ public int GetCurrentRecordNum() /// public void WriteBlock(byte[] block) { - if (block == null) - { - throw new ArgumentNullException(nameof(block)); - } - - if (outputStream == null) - { - throw new TarException("TarBuffer.WriteBlock - no output stream defined"); - } - - if (block.Length != BlockSize) - { - string errorText = string.Format("TarBuffer.WriteBlock - block to write has length '{0}' which is not the block size of '{1}'", - block.Length, BlockSize); - throw new TarException(errorText); - } - - if (currentBlockIndex >= BlockFactor) - { - WriteRecord(); - } + WriteBlock(block, 0); + } - Array.Copy(block, 0, recordBuffer, (currentBlockIndex * BlockSize), BlockSize); - currentBlockIndex++; + /// + /// Write an archive record to the archive, where the record may be + /// inside of a larger array buffer. The buffer must be "offset plus + /// record size" long. + /// + /// + /// The buffer containing the record data to write. + /// + /// + /// The offset of the record data within buffer. + /// + /// + public ValueTask WriteBlockAsync(byte[] buffer, int offset, CancellationToken ct) + { + return WriteBlockAsync(buffer, offset, ct, true); } /// @@ -509,6 +526,11 @@ public void WriteBlock(byte[] block) /// The offset of the record data within buffer. /// public void WriteBlock(byte[] buffer, int offset) + { + WriteBlockAsync(buffer, offset, CancellationToken.None, false).GetAwaiter().GetResult(); + } + + internal async ValueTask WriteBlockAsync(byte[] buffer, int offset, CancellationToken ct, bool isAsync) { if (buffer == null) { @@ -527,14 +549,15 @@ public void WriteBlock(byte[] buffer, int offset) if ((offset + BlockSize) > buffer.Length) { - string errorText = string.Format("TarBuffer.WriteBlock - record has length '{0}' with offset '{1}' which is less than the record size of '{2}'", + string errorText = string.Format( + "TarBuffer.WriteBlock - record has length '{0}' with offset '{1}' which is less than the record size of '{2}'", buffer.Length, offset, recordSize); throw new TarException(errorText); } if (currentBlockIndex >= BlockFactor) { - WriteRecord(); + await WriteRecordAsync(CancellationToken.None, isAsync); } Array.Copy(buffer, offset, recordBuffer, (currentBlockIndex * BlockSize), BlockSize); @@ -545,15 +568,23 @@ public void WriteBlock(byte[] buffer, int offset) /// /// Write a TarBuffer record to the archive. /// - private void WriteRecord() + private async ValueTask WriteRecordAsync(CancellationToken ct, bool isAsync) { if (outputStream == null) { throw new TarException("TarBuffer.WriteRecord no output stream defined"); } - outputStream.Write(recordBuffer, 0, RecordSize); - outputStream.Flush(); + if (isAsync) + { + await outputStream.WriteAsync(recordBuffer, 0, RecordSize, ct); + await outputStream.FlushAsync(ct); + } + else + { + outputStream.Write(recordBuffer, 0, RecordSize); + outputStream.Flush(); + } currentBlockIndex = 0; currentRecordIndex++; @@ -564,7 +595,7 @@ private void WriteRecord() /// /// Any trailing bytes are set to zero which is by definition correct behaviour /// for the end of a tar stream. - private void WriteFinalRecord() + private async ValueTask WriteFinalRecordAsync(CancellationToken ct, bool isAsync) { if (outputStream == null) { @@ -575,36 +606,76 @@ private void WriteFinalRecord() { int dataBytes = currentBlockIndex * BlockSize; Array.Clear(recordBuffer, dataBytes, RecordSize - dataBytes); - WriteRecord(); + await WriteRecordAsync(ct, isAsync); } - outputStream.Flush(); + if (isAsync) + { + await outputStream.FlushAsync(ct); + } + else + { + outputStream.Flush(); + } } /// /// Close the TarBuffer. If this is an output buffer, also flush the /// current block before closing. /// - public void Close() + public void Close() => CloseAsync(CancellationToken.None, false).GetAwaiter().GetResult(); + + /// + /// Close the TarBuffer. If this is an output buffer, also flush the + /// current block before closing. + /// + public Task CloseAsync(CancellationToken ct) => CloseAsync(ct, true).AsTask(); + + private async ValueTask CloseAsync(CancellationToken ct, bool isAsync) { if (outputStream != null) { - WriteFinalRecord(); + await WriteFinalRecordAsync(ct, isAsync); if (IsStreamOwner) { - outputStream.Dispose(); + if (isAsync) + { +#if NETSTANDARD2_1_OR_GREATER + await outputStream.DisposeAsync(); +#else + outputStream.Dispose(); +#endif + } + else + { + outputStream.Dispose(); + } } + outputStream = null; } else if (inputStream != null) { if (IsStreamOwner) { - inputStream.Dispose(); + if (isAsync) + { +#if NETSTANDARD2_1_OR_GREATER + await inputStream.DisposeAsync(); +#else + inputStream.Dispose(); +#endif + } + else + { + inputStream.Dispose(); + } } + inputStream = null; } + ArrayPool.Shared.Return(recordBuffer); } diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs b/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs index 9a986de86..2f3cf7862 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarEntry.cs @@ -497,9 +497,6 @@ static public void AdjustEntryName(byte[] buffer, string newName, Encoding nameE /// /// Fill in a TarHeader given only the entry's name. /// - /// - /// The TarHeader to fill in. - /// /// /// The tar entry name. /// diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs b/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs index 8a6d263a0..36d6eca44 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarHeader.cs @@ -126,106 +126,106 @@ public class TarHeader /// /// Normal file type. /// - public const byte LF_NORMAL = (byte)'0'; + public const byte LF_NORMAL = (byte) '0'; /// /// Link file type. /// - public const byte LF_LINK = (byte)'1'; + public const byte LF_LINK = (byte) '1'; /// /// Symbolic link file type. /// - public const byte LF_SYMLINK = (byte)'2'; + public const byte LF_SYMLINK = (byte) '2'; /// /// Character device file type. /// - public const byte LF_CHR = (byte)'3'; + public const byte LF_CHR = (byte) '3'; /// /// Block device file type. /// - public const byte LF_BLK = (byte)'4'; + public const byte LF_BLK = (byte) '4'; /// /// Directory file type. /// - public const byte LF_DIR = (byte)'5'; + public const byte LF_DIR = (byte) '5'; /// /// FIFO (pipe) file type. /// - public const byte LF_FIFO = (byte)'6'; + public const byte LF_FIFO = (byte) '6'; /// /// Contiguous file type. /// - public const byte LF_CONTIG = (byte)'7'; + public const byte LF_CONTIG = (byte) '7'; /// /// Posix.1 2001 global extended header /// - public const byte LF_GHDR = (byte)'g'; + public const byte LF_GHDR = (byte) 'g'; /// /// Posix.1 2001 extended header /// - public const byte LF_XHDR = (byte)'x'; + public const byte LF_XHDR = (byte) 'x'; // POSIX allows for upper case ascii type as extensions /// /// Solaris access control list file type /// - public const byte LF_ACL = (byte)'A'; + public const byte LF_ACL = (byte) 'A'; /// /// GNU dir dump file type /// This is a dir entry that contains the names of files that were in the /// dir at the time the dump was made /// - public const byte LF_GNU_DUMPDIR = (byte)'D'; + public const byte LF_GNU_DUMPDIR = (byte) 'D'; /// /// Solaris Extended Attribute File /// - public const byte LF_EXTATTR = (byte)'E'; + public const byte LF_EXTATTR = (byte) 'E'; /// /// Inode (metadata only) no file content /// - public const byte LF_META = (byte)'I'; + public const byte LF_META = (byte) 'I'; /// /// Identifies the next file on the tape as having a long link name /// - public const byte LF_GNU_LONGLINK = (byte)'K'; + public const byte LF_GNU_LONGLINK = (byte) 'K'; /// /// Identifies the next file on the tape as having a long name /// - public const byte LF_GNU_LONGNAME = (byte)'L'; + public const byte LF_GNU_LONGNAME = (byte) 'L'; /// /// Continuation of a file that began on another volume /// - public const byte LF_GNU_MULTIVOL = (byte)'M'; + public const byte LF_GNU_MULTIVOL = (byte) 'M'; /// /// For storing filenames that dont fit in the main header (old GNU) /// - public const byte LF_GNU_NAMES = (byte)'N'; + public const byte LF_GNU_NAMES = (byte) 'N'; /// /// GNU Sparse file /// - public const byte LF_GNU_SPARSE = (byte)'S'; + public const byte LF_GNU_SPARSE = (byte) 'S'; /// /// GNU Tape/volume header ignore on extraction /// - public const byte LF_GNU_VOLHDR = (byte)'V'; + public const byte LF_GNU_VOLHDR = (byte) 'V'; /// /// The magic tag representing a POSIX tar archive. (would be written with a trailing NULL) @@ -279,6 +279,7 @@ public string Name { throw new ArgumentNullException(nameof(value)); } + name = value; } } @@ -341,6 +342,7 @@ public long Size { throw new ArgumentOutOfRangeException(nameof(value), "Cannot be less than zero"); } + size = value; } } @@ -361,6 +363,7 @@ public DateTime ModTime { throw new ArgumentOutOfRangeException(nameof(value), "ModTime cannot be before Jan 1st 1970"); } + modTime = new DateTime(value.Year, value.Month, value.Day, value.Hour, value.Minute, value.Second); } } @@ -403,6 +406,7 @@ public string LinkName { throw new ArgumentNullException(nameof(value)); } + linkName = value; } } @@ -420,6 +424,7 @@ public string Magic { throw new ArgumentNullException(nameof(value)); } + magic = value; } } @@ -430,10 +435,7 @@ public string Magic /// Thrown when attempting to set Version to null. public string Version { - get - { - return version; - } + get { return version; } set { @@ -441,6 +443,7 @@ public string Version { throw new ArgumentNullException(nameof(value)); } + version = value; } } @@ -464,6 +467,7 @@ public string UserName { currentUser = currentUser.Substring(0, UNAMELEN); } + userName = currentUser; } } @@ -541,17 +545,18 @@ public void ParseBuffer(byte[] header, Encoding nameEncoding) } int offset = 0; + var headerSpan = header.AsSpan(); - name = ParseName(header, offset, NAMELEN, nameEncoding); + name = ParseName(headerSpan.Slice(offset, NAMELEN), nameEncoding); offset += NAMELEN; - mode = (int)ParseOctal(header, offset, MODELEN); + mode = (int) ParseOctal(header, offset, MODELEN); offset += MODELEN; - UserId = (int)ParseOctal(header, offset, UIDLEN); + UserId = (int) ParseOctal(header, offset, UIDLEN); offset += UIDLEN; - GroupId = (int)ParseOctal(header, offset, GIDLEN); + GroupId = (int) ParseOctal(header, offset, GIDLEN); offset += GIDLEN; Size = ParseBinaryOrOctal(header, offset, SIZELEN); @@ -560,35 +565,35 @@ public void ParseBuffer(byte[] header, Encoding nameEncoding) ModTime = GetDateTimeFromCTime(ParseOctal(header, offset, MODTIMELEN)); offset += MODTIMELEN; - checksum = (int)ParseOctal(header, offset, CHKSUMLEN); + checksum = (int) ParseOctal(header, offset, CHKSUMLEN); offset += CHKSUMLEN; TypeFlag = header[offset++]; - LinkName = ParseName(header, offset, NAMELEN, nameEncoding); + LinkName = ParseName(headerSpan.Slice(offset, NAMELEN), nameEncoding); offset += NAMELEN; - Magic = ParseName(header, offset, MAGICLEN, nameEncoding); + Magic = ParseName(headerSpan.Slice(offset, MAGICLEN), nameEncoding); offset += MAGICLEN; if (Magic == "ustar") { - Version = ParseName(header, offset, VERSIONLEN, nameEncoding); + Version = ParseName(headerSpan.Slice(offset, VERSIONLEN), nameEncoding); offset += VERSIONLEN; - UserName = ParseName(header, offset, UNAMELEN, nameEncoding); + UserName = ParseName(headerSpan.Slice(offset, UNAMELEN), nameEncoding); offset += UNAMELEN; - GroupName = ParseName(header, offset, GNAMELEN, nameEncoding); + GroupName = ParseName(headerSpan.Slice(offset, GNAMELEN), nameEncoding); offset += GNAMELEN; - DevMajor = (int)ParseOctal(header, offset, DEVLEN); + DevMajor = (int) ParseOctal(header, offset, DEVLEN); offset += DEVLEN; - DevMinor = (int)ParseOctal(header, offset, DEVLEN); + DevMinor = (int) ParseOctal(header, offset, DEVLEN); offset += DEVLEN; - string prefix = ParseName(header, offset, PREFIXLEN, nameEncoding); + string prefix = ParseName(headerSpan.Slice(offset, PREFIXLEN), nameEncoding); if (!string.IsNullOrEmpty(prefix)) Name = prefix + '/' + Name; } @@ -642,7 +647,7 @@ public void WriteHeader(byte[] outBuffer, Encoding nameEncoding) int csOffset = offset; for (int c = 0; c < CHKSUMLEN; ++c) { - outBuffer[offset++] = (byte)' '; + outBuffer[offset++] = (byte) ' '; } outBuffer[offset++] = TypeFlag; @@ -687,7 +692,7 @@ public override int GetHashCode() public override bool Equals(object obj) { var localHeader = obj as TarHeader; - + bool result; if (localHeader != null) { @@ -711,7 +716,7 @@ public override bool Equals(object obj) { result = false; } - + return result; } @@ -750,8 +755,10 @@ private static long ParseBinaryOrOctal(byte[] header, int offset, int length) { result = result << 8 | header[offset + pos]; } + return result; } + return ParseOctal(header, offset, length); } @@ -780,14 +787,14 @@ public static long ParseOctal(byte[] header, int offset, int length) break; } - if (header[i] == (byte)' ' || header[i] == '0') + if (header[i] == (byte) ' ' || header[i] == '0') { if (stillPadding) { continue; } - if (header[i] == (byte)' ') + if (header[i] == (byte) ' ') { break; } @@ -819,7 +826,7 @@ public static long ParseOctal(byte[] header, int offset, int length) [Obsolete("No Encoding for Name field is specified, any non-ASCII bytes will be discarded")] public static string ParseName(byte[] header, int offset, int length) { - return ParseName(header, offset, length, null); + return ParseName(header.AsSpan().Slice(offset, length), null); } /// @@ -828,58 +835,33 @@ public static string ParseName(byte[] header, int offset, int length) /// /// The header buffer from which to parse. /// - /// - /// The offset into the buffer from which to parse. - /// - /// - /// The number of header bytes to parse. - /// /// /// name encoding, or null for ASCII only /// /// /// The name parsed. /// - public static string ParseName(byte[] header, int offset, int length, Encoding encoding) + public static string ParseName(ReadOnlySpan header, Encoding encoding) { - if (header == null) - { - throw new ArgumentNullException(nameof(header)); - } - - if (offset < 0) - { - throw new ArgumentOutOfRangeException(nameof(offset), "Cannot be less than zero"); - } - - if (length < 0) - { - throw new ArgumentOutOfRangeException(nameof(length), "Cannot be less than zero"); - } - - if (offset + length > header.Length) - { - throw new ArgumentException("Exceeds header size", nameof(length)); - } - var builder = StringBuilderPool.Instance.Rent(); int count = 0; if (encoding == null) { - for (int i = offset; i < offset + length; ++i) + for (int i = 0; i < header.Length; ++i) { - if (header[i] == 0) + var b = header[i]; + if (b == 0) { break; } - builder.Append((char)header[i]); + builder.Append((char) b); } } else { - for (int i = offset; i < offset + length; ++i, ++count) + for (int i = 0; i < header.Length; ++i, ++count) { if (header[i] == 0) { @@ -887,7 +869,12 @@ public static string ParseName(byte[] header, int offset, int length, Encoding e } } - builder.Append(encoding.GetString(header, offset, count)); +#if NETSTANDARD2_1_OR_GREATER + var value = encoding.GetString(header.Slice(0, count)); +#else + var value = encoding.GetString(header.ToArray(), 0, count); +#endif + builder.Append(value); } var result = builder.ToString(); @@ -950,7 +937,8 @@ public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int b if (encoding != null) { // it can be more sufficient if using Span or unsafe - ReadOnlySpan nameArray = name.AsSpan().Slice(nameOffset, Math.Min(name.Length - nameOffset, length)); + ReadOnlySpan nameArray = + name.AsSpan().Slice(nameOffset, Math.Min(name.Length - nameOffset, length)); var charArray = ArrayPool.Shared.Rent(nameArray.Length); nameArray.CopyTo(charArray); @@ -963,7 +951,7 @@ public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int b { for (i = 0; i < length && nameOffset + i < name.Length; ++i) { - buffer[bufferOffset + i] = (byte)name[nameOffset + i]; + buffer[bufferOffset + i] = (byte) name[nameOffset + i]; } } @@ -971,8 +959,10 @@ public static int GetNameBytes(string name, int nameOffset, byte[] buffer, int b { buffer[bufferOffset + i] = 0; } + return bufferOffset + length; } + /// /// Add an entry name to the buffer /// @@ -1071,6 +1061,7 @@ public static int GetNameBytes(string name, byte[] buffer, int offset, int lengt return GetNameBytes(name, 0, buffer, offset, length, encoding); } + /// /// Add a string to a buffer as a collection of ascii bytes. /// @@ -1114,7 +1105,7 @@ public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int { for (i = 0; i < length && nameOffset + i < toAdd.Length; ++i) { - buffer[bufferOffset + i] = (byte)toAdd[nameOffset + i]; + buffer[bufferOffset + i] = (byte) toAdd[nameOffset + i]; } } else @@ -1126,6 +1117,7 @@ public static int GetAsciiBytes(string toAdd, int nameOffset, byte[] buffer, int i = Math.Min(bytes.Length, length); Array.Copy(bytes, 0, buffer, bufferOffset, i); } + // If length is beyond the toAdd string length (which is OK by the prev loop condition), eg if a field has fixed length and the string is shorter, make sure all of the extra chars are written as NULLs, so that the reader func would ignore them and get back the original string for (; i < length; ++i) buffer[bufferOffset + i] = 0; @@ -1167,14 +1159,14 @@ public static int GetOctalBytes(long value, byte[] buffer, int offset, int lengt { for (long v = value; (localIndex >= 0) && (v > 0); --localIndex) { - buffer[offset + localIndex] = (byte)((byte)'0' + (byte)(v & 7)); + buffer[offset + localIndex] = (byte) ((byte) '0' + (byte) (v & 7)); v >>= 3; } } for (; localIndex >= 0; --localIndex) { - buffer[offset + localIndex] = (byte)'0'; + buffer[offset + localIndex] = (byte) '0'; } return offset + length; @@ -1196,12 +1188,14 @@ private static int GetBinaryOrOctalBytes(long value, byte[] buffer, int offset, // Put value as binary, right-justified into the buffer. Set high order bit of left-most byte. for (int pos = length - 1; pos > 0; pos--) { - buffer[offset + pos] = (byte)value; + buffer[offset + pos] = (byte) value; value = value >> 8; } + buffer[offset] = 0x80; return offset + length; } + return GetOctalBytes(value, buffer, offset, length); } @@ -1235,6 +1229,7 @@ private static int ComputeCheckSum(byte[] buffer) { sum += buffer[i]; } + return sum; } @@ -1253,19 +1248,20 @@ private static int MakeCheckSum(byte[] buffer) for (int i = 0; i < CHKSUMLEN; ++i) { - sum += (byte)' '; + sum += (byte) ' '; } for (int i = CHKSUMOFS + CHKSUMLEN; i < buffer.Length; ++i) { sum += buffer[i]; } + return sum; } private static int GetCTime(DateTime dateTime) { - return unchecked((int)((dateTime.Ticks - dateTime1970.Ticks) / timeConversionFactor)); + return unchecked((int) ((dateTime.Ticks - dateTime1970.Ticks) / timeConversionFactor)); } private static DateTime GetDateTimeFromCTime(long ticks) @@ -1280,6 +1276,7 @@ private static DateTime GetDateTimeFromCTime(long ticks) { result = dateTime1970; } + return result; } diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs index c87b6ff68..cdd84a3d5 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs @@ -2,6 +2,9 @@ using System.Buffers; using System.IO; using System.Text; +using System.Threading; +using System.Threading.Tasks; +using ICSharpCode.SharpZipLib.Core; namespace ICSharpCode.SharpZipLib.Tar { @@ -129,6 +132,15 @@ public override void Flush() inputStream.Flush(); } + /// + /// Flushes the baseInputStream + /// + /// + public override async Task FlushAsync(CancellationToken cancellationToken) + { + await inputStream.FlushAsync(cancellationToken); + } + /// /// Set the streams position. This operation is not supported and will throw a NotSupportedException /// @@ -182,17 +194,65 @@ public override void WriteByte(byte value) /// A byte cast to an int; -1 if the at the end of the stream. public override int ReadByte() { - byte[] oneByteBuffer = new byte[1]; - int num = Read(oneByteBuffer, 0, 1); + var oneByteBuffer = ArrayPool.Shared.Rent(1); + var num = Read(oneByteBuffer, 0, 1); if (num <= 0) { // return -1 to indicate that no byte was read. return -1; } - return oneByteBuffer[0]; + var result = oneByteBuffer[0]; + ArrayPool.Shared.Return(oneByteBuffer); + return result; + } + + + /// + /// Reads bytes from the current tar archive entry. + /// + /// This method is aware of the boundaries of the current + /// entry in the archive and will deal with them appropriately + /// + /// + /// The buffer into which to place bytes read. + /// + /// + /// The offset at which to place bytes read. + /// + /// + /// The number of bytes to read. + /// + /// + /// + /// The number of bytes read, or 0 at end of stream/EOF. + /// + public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) + { + return ReadAsync(buffer.AsMemory().Slice(offset, count), cancellationToken, true).AsTask(); } +#if NETSTANDARD2_1_OR_GREATER + /// + /// Reads bytes from the current tar archive entry. + /// + /// This method is aware of the boundaries of the current + /// entry in the archive and will deal with them appropriately + /// + /// + /// The buffer into which to place bytes read. + /// + /// + /// + /// The number of bytes read, or 0 at end of stream/EOF. + /// + public override ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = + new CancellationToken()) + { + return ReadAsync(buffer, cancellationToken, true); + } +#endif + /// /// Reads bytes from the current tar archive entry. /// @@ -218,6 +278,13 @@ public override int Read(byte[] buffer, int offset, int count) throw new ArgumentNullException(nameof(buffer)); } + return ReadAsync(buffer.AsMemory().Slice(offset, count), CancellationToken.None, false).GetAwaiter() + .GetResult(); + } + + private async ValueTask ReadAsync(Memory buffer, CancellationToken ct, bool isAsync) + { + int offset = 0; int totalRead = 0; if (entryOffset >= entrySize) @@ -225,7 +292,7 @@ public override int Read(byte[] buffer, int offset, int count) return 0; } - long numToRead = count; + long numToRead = buffer.Length; if ((numToRead + entryOffset) > entrySize) { @@ -236,7 +303,7 @@ public override int Read(byte[] buffer, int offset, int count) { int sz = (numToRead > readBuffer.Length) ? readBuffer.Length : (int)numToRead; - Array.Copy(readBuffer, 0, buffer, offset, sz); + readBuffer.AsSpan().Slice(0, sz).CopyTo(buffer.Slice(offset, sz).Span); if (sz >= readBuffer.Length) { @@ -245,8 +312,13 @@ public override int Read(byte[] buffer, int offset, int count) else { int newLen = readBuffer.Length - sz; - byte[] newBuf = new byte[newLen]; - Array.Copy(readBuffer, sz, newBuf, 0, newLen); + byte[] newBuf = ArrayPool.Shared.Rent(newLen); + readBuffer.AsSpan().Slice(sz, newLen).CopyTo(newBuf.AsSpan().Slice(0, newLen)); + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } + readBuffer = newBuf; } @@ -257,24 +329,28 @@ public override int Read(byte[] buffer, int offset, int count) var recLen = TarBuffer.BlockSize; var recBuf = ArrayPool.Shared.Rent(recLen); - var recBufSpan = recBuf.AsSpan(); while (numToRead > 0) { - tarBuffer.ReadBlockInt(recBuf); + await tarBuffer.ReadBlockIntAsync(recBuf, ct, isAsync); var sz = (int)numToRead; if (recLen > sz) { - recBufSpan.Slice(0, sz).CopyTo(buffer.AsSpan().Slice(offset, sz)); + recBuf.AsSpan().Slice(0, sz).CopyTo(buffer.Slice(offset, sz).Span); + if (readBuffer != null) + { + ArrayPool.Shared.Return(readBuffer); + } + readBuffer = ArrayPool.Shared.Rent(recLen - sz); - recBufSpan.Slice(sz, recLen - sz).CopyTo(readBuffer); + recBuf.AsSpan().Slice(sz, recLen - sz).CopyTo(readBuffer.AsSpan()); } else { sz = recLen; - recBufSpan.CopyTo(buffer.AsSpan().Slice(offset, recLen)); + recBuf.AsSpan().CopyTo(buffer.Slice(offset, recLen).Span); } totalRead += sz; @@ -301,6 +377,17 @@ protected override void Dispose(bool disposing) } } +#if NETSTANDARD2_1_OR_GREATER + /// + /// Closes this stream. Calls the TarBuffer's close() method. + /// The underlying stream is closed by the TarBuffer. + /// + public override async ValueTask DisposeAsync() + { + await tarBuffer.CloseAsync(CancellationToken.None); + } +#endif + #endregion Stream Overrides /// @@ -356,25 +443,42 @@ public long Available /// /// The number of bytes to skip. /// - public void Skip(long skipCount) + /// + private Task SkipAsync(long skipCount, CancellationToken ct) => SkipAsync(skipCount, ct, true).AsTask(); + + /// + /// Skip bytes in the input buffer. This skips bytes in the + /// current entry's data, not the entire archive, and will + /// stop at the end of the current entry's data if the number + /// to skip extends beyond that point. + /// + /// + /// The number of bytes to skip. + /// + private void Skip(long skipCount) => + SkipAsync(skipCount, CancellationToken.None, false).GetAwaiter().GetResult(); + + private async ValueTask SkipAsync(long skipCount, CancellationToken ct, bool isAsync) { // TODO: REVIEW efficiency of TarInputStream.Skip // This is horribly inefficient, but it ensures that we // properly skip over bytes via the TarBuffer... // - byte[] skipBuf = new byte[8 * 1024]; - - for (long num = skipCount; num > 0;) + var length = 8 * 1024; + using (var skipBuf = MemoryPool.Shared.Rent(length)) { - int toRead = num > skipBuf.Length ? skipBuf.Length : (int)num; - int numRead = Read(skipBuf, 0, toRead); - - if (numRead == -1) + for (long num = skipCount; num > 0;) { - break; - } + int toRead = num > length ? length : (int)num; + int numRead = await ReadAsync(skipBuf.Memory.Slice(0, toRead), ct, isAsync); + + if (numRead == -1) + { + break; + } - num -= numRead; + num -= numRead; + } } } @@ -417,7 +521,24 @@ public void Reset() /// /// The next TarEntry in the archive, or null. /// - public TarEntry GetNextEntry() + public Task GetNextEntryAsync(CancellationToken ct) => GetNextEntryAsync(ct, true).AsTask(); + + /// + /// Get the next entry in this tar archive. This will skip + /// over any remaining data in the current entry, if there + /// is one, and place the input stream at the header of the + /// next entry, and read the header and instantiate a new + /// TarEntry from the header bytes and return that entry. + /// If there are no more entries in the archive, null will + /// be returned to indicate that the end of the archive has + /// been reached. + /// + /// + /// The next TarEntry in the archive, or null. + /// + public TarEntry GetNextEntry() => GetNextEntryAsync(CancellationToken.None, true).GetAwaiter().GetResult(); + + private async ValueTask GetNextEntryAsync(CancellationToken ct, bool isAsync) { if (hasHitEOF) { @@ -426,18 +547,18 @@ public TarEntry GetNextEntry() if (currentEntry != null) { - SkipToNextEntry(); + await SkipToNextEntryAsync(ct, isAsync); } byte[] headerBuf = ArrayPool.Shared.Rent(TarBuffer.BlockSize); - tarBuffer.ReadBlockInt(headerBuf); + await tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); if (TarBuffer.IsEndOfArchiveBlock(headerBuf)) { hasHitEOF = true; // Read the second zero-filled block - tarBuffer.ReadBlockInt(headerBuf); + await tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); } else { @@ -466,51 +587,57 @@ public TarEntry GetNextEntry() this.entryOffset = 0; this.entrySize = header.Size; - StringBuilder longName = null; + string longName = null; if (header.TypeFlag == TarHeader.LF_GNU_LONGNAME) { - byte[] nameBuffer = new byte[TarBuffer.BlockSize]; - long numToRead = this.entrySize; - - longName = new StringBuilder(); - - while (numToRead > 0) + using (var nameBuffer = MemoryPool.Shared.Rent(TarBuffer.BlockSize)) { - int numRead = this.Read(nameBuffer, 0, - (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); + long numToRead = this.entrySize; - if (numRead == -1) + var longNameBuilder = StringBuilderPool.Instance.Rent(); + + while (numToRead > 0) { - throw new InvalidHeaderException("Failed to read long name entry"); + var length = (numToRead > TarBuffer.BlockSize ? TarBuffer.BlockSize : (int)numToRead); + int numRead = await ReadAsync(nameBuffer.Memory.Slice(0, length), ct, isAsync); + + if (numRead == -1) + { + throw new InvalidHeaderException("Failed to read long name entry"); + } + + longNameBuilder.Append(TarHeader.ParseName(nameBuffer.Memory.Slice(0, numRead).Span, + encoding)); + numToRead -= numRead; } - longName.Append(TarHeader.ParseName(nameBuffer, 0, numRead, encoding).ToString()); - numToRead -= numRead; - } + longName = longNameBuilder.ToString(); + StringBuilderPool.Instance.Return(longNameBuilder); - SkipToNextEntry(); - this.tarBuffer.ReadBlockInt(headerBuf); + await SkipToNextEntryAsync(ct, isAsync); + await this.tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); + } } else if (header.TypeFlag == TarHeader.LF_GHDR) { // POSIX global extended header // Ignore things we dont understand completely for now - SkipToNextEntry(); - this.tarBuffer.ReadBlockInt(headerBuf); + await SkipToNextEntryAsync(ct, isAsync); + await this.tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); } else if (header.TypeFlag == TarHeader.LF_XHDR) { // POSIX extended header - byte[] nameBuffer = new byte[TarBuffer.BlockSize]; + byte[] nameBuffer = ArrayPool.Shared.Rent(TarBuffer.BlockSize); long numToRead = this.entrySize; var xhr = new TarExtendedHeaderReader(); while (numToRead > 0) { - int numRead = this.Read(nameBuffer, 0, - (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead)); + var length = (numToRead > nameBuffer.Length ? nameBuffer.Length : (int)numToRead); + int numRead = await ReadAsync(nameBuffer.AsMemory().Slice(0, length), ct, isAsync); if (numRead == -1) { @@ -521,19 +648,21 @@ public TarEntry GetNextEntry() numToRead -= numRead; } + ArrayPool.Shared.Return(nameBuffer); + if (xhr.Headers.TryGetValue("path", out string name)) { - longName = new StringBuilder(name); + longName = name; } - SkipToNextEntry(); - this.tarBuffer.ReadBlockInt(headerBuf); + await SkipToNextEntryAsync(ct, isAsync); + await this.tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); } else if (header.TypeFlag == TarHeader.LF_GNU_VOLHDR) { // TODO: could show volume name when verbose - SkipToNextEntry(); - this.tarBuffer.ReadBlockInt(headerBuf); + await SkipToNextEntryAsync(ct, isAsync); + await this.tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); } else if (header.TypeFlag != TarHeader.LF_NORMAL && header.TypeFlag != TarHeader.LF_OLDNORM && @@ -542,8 +671,8 @@ public TarEntry GetNextEntry() header.TypeFlag != TarHeader.LF_DIR) { // Ignore things we dont understand completely for now - SkipToNextEntry(); - tarBuffer.ReadBlockInt(headerBuf); + await SkipToNextEntryAsync(ct, isAsync); + await tarBuffer.ReadBlockIntAsync(headerBuf, ct, isAsync); } if (entryFactory == null) @@ -553,9 +682,10 @@ public TarEntry GetNextEntry() { ArrayPool.Shared.Return(readBuffer); } + if (longName != null) { - currentEntry.Name = longName.ToString(); + currentEntry.Name = longName; } } else @@ -584,6 +714,7 @@ public TarEntry GetNextEntry() { ArrayPool.Shared.Return(readBuffer); } + string errorText = string.Format("Bad header in record {0} block {1} {2}", tarBuffer.CurrentRecord, tarBuffer.CurrentBlock, ex.Message); throw new InvalidHeaderException(errorText); @@ -602,29 +733,52 @@ public TarEntry GetNextEntry() /// /// The OutputStream into which to write the entry's data. /// - public void CopyEntryContents(Stream outputStream) + /// + public Task CopyEntryContentsAsync(Stream outputStream, CancellationToken ct) => + CopyEntryContentsAsync(outputStream, ct, true).AsTask(); + + /// + /// Copies the contents of the current tar archive entry directly into + /// an output stream. + /// + /// + /// The OutputStream into which to write the entry's data. + /// + public void CopyEntryContents(Stream outputStream) => + CopyEntryContentsAsync(outputStream, CancellationToken.None, false).GetAwaiter().GetResult(); + + private async ValueTask CopyEntryContentsAsync(Stream outputStream, CancellationToken ct, bool isAsync) { - byte[] tempBuffer = new byte[32 * 1024]; + byte[] tempBuffer = ArrayPool.Shared.Rent(32 * 1024); while (true) { - int numRead = Read(tempBuffer, 0, tempBuffer.Length); + int numRead = await ReadAsync(tempBuffer, ct, isAsync); if (numRead <= 0) { break; } - outputStream.Write(tempBuffer, 0, numRead); + if (isAsync) + { + await outputStream.WriteAsync(tempBuffer, 0, numRead, ct); + } + else + { + outputStream.Write(tempBuffer, 0, numRead); + } } + + ArrayPool.Shared.Return(tempBuffer); } - private void SkipToNextEntry() + private async ValueTask SkipToNextEntryAsync(CancellationToken ct, bool isAsync) { long numToSkip = entrySize - entryOffset; if (numToSkip > 0) { - Skip(numToSkip); + await SkipAsync(numToSkip, ct, isAsync); } if (readBuffer != null) diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs b/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs index 8b9e4f6c7..9ce13f15d 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarOutputStream.cs @@ -2,6 +2,8 @@ using System.Buffers; using System.IO; using System.Text; +using System.Threading; +using System.Threading.Tasks; namespace ICSharpCode.SharpZipLib.Tar { @@ -95,10 +97,7 @@ public bool IsStreamOwner /// public override bool CanRead { - get - { - return outputStream.CanRead; - } + get { return outputStream.CanRead; } } /// @@ -106,10 +105,7 @@ public override bool CanRead /// public override bool CanSeek { - get - { - return outputStream.CanSeek; - } + get { return outputStream.CanSeek; } } /// @@ -117,10 +113,7 @@ public override bool CanSeek /// public override bool CanWrite { - get - { - return outputStream.CanWrite; - } + get { return outputStream.CanWrite; } } /// @@ -128,10 +121,7 @@ public override bool CanWrite /// public override long Length { - get - { - return outputStream.Length; - } + get { return outputStream.Length; } } /// @@ -139,14 +129,8 @@ public override long Length /// public override long Position { - get - { - return outputStream.Position; - } - set - { - outputStream.Position = value; - } + get { return outputStream.Position; } + set { outputStream.Position = value; } } /// @@ -194,6 +178,23 @@ public override int Read(byte[] buffer, int offset, int count) return outputStream.Read(buffer, offset, count); } + /// + /// read bytes from the current stream and advance the position within the + /// stream by the number of bytes read. + /// + /// The buffer to store read bytes in. + /// The index into the buffer to being storing bytes at. + /// The desired number of bytes to read. + /// + /// The total number of bytes read, or zero if at the end of the stream. + /// The number of bytes may be less than the count + /// requested if data is not available. + public override async Task ReadAsync(byte[] buffer, int offset, int count, + CancellationToken cancellationToken) + { + return await outputStream.ReadAsync(buffer, offset, count, cancellationToken); + } + /// /// All buffered data is written to destination /// @@ -202,17 +203,34 @@ public override void Flush() outputStream.Flush(); } + /// + /// All buffered data is written to destination + /// + public override async Task FlushAsync(CancellationToken cancellationToken) + { + await outputStream.FlushAsync(cancellationToken); + } + + /// + /// Ends the TAR archive without closing the underlying OutputStream. + /// The result is that the EOF block of nulls is written. + /// + public void Finish() => FinishAsync(CancellationToken.None, false).GetAwaiter().GetResult(); + /// /// Ends the TAR archive without closing the underlying OutputStream. /// The result is that the EOF block of nulls is written. /// - public void Finish() + public Task FinishAsync(CancellationToken cancellationToken) => FinishAsync(cancellationToken, true); + + private async Task FinishAsync(CancellationToken cancellationToken, bool isAsync) { if (IsEntryOpen) { - CloseEntry(); + await CloseEntryAsync(cancellationToken, isAsync); } - WriteEofBlock(); + + await WriteEofBlockAsync(cancellationToken, isAsync); } /// @@ -227,7 +245,7 @@ protected override void Dispose(bool disposing) isClosed = true; Finish(); buffer.Close(); - + ArrayPool.Shared.Return(assemblyBuffer); ArrayPool.Shared.Return(blockBuffer); } @@ -273,44 +291,70 @@ private bool IsEntryOpen /// /// The TarEntry to be written to the archive. /// - public void PutNextEntry(TarEntry entry) + /// + public Task PutNextEntryAsync(TarEntry entry, CancellationToken cancellationToken) => + PutNextEntryAsync(entry, cancellationToken, true); + + /// + /// Put an entry on the output stream. This writes the entry's + /// header and positions the output stream for writing + /// the contents of the entry. Once this method is called, the + /// stream is ready for calls to write() to write the entry's + /// contents. Once the contents are written, closeEntry() + /// MUST be called to ensure that all buffered data + /// is completely written to the output stream. + /// + /// + /// The TarEntry to be written to the archive. + /// + public void PutNextEntry(TarEntry entry) => + PutNextEntryAsync(entry, CancellationToken.None, false).GetAwaiter().GetResult(); + + private async Task PutNextEntryAsync(TarEntry entry, CancellationToken cancellationToken, bool isAsync) { if (entry == null) { throw new ArgumentNullException(nameof(entry)); } - var namelen = nameEncoding != null ? nameEncoding.GetByteCount(entry.TarHeader.Name) : entry.TarHeader.Name.Length; + var namelen = nameEncoding != null + ? nameEncoding.GetByteCount(entry.TarHeader.Name) + : entry.TarHeader.Name.Length; if (namelen > TarHeader.NAMELEN) { var longHeader = new TarHeader(); longHeader.TypeFlag = TarHeader.LF_GNU_LONGNAME; longHeader.Name = longHeader.Name + "././@LongLink"; - longHeader.Mode = 420;//644 by default + longHeader.Mode = 420; //644 by default longHeader.UserId = entry.UserId; longHeader.GroupId = entry.GroupId; longHeader.GroupName = entry.GroupName; longHeader.UserName = entry.UserName; longHeader.LinkName = ""; - longHeader.Size = namelen + 1; // Plus one to avoid dropping last char + longHeader.Size = namelen + 1; // Plus one to avoid dropping last char longHeader.WriteHeader(blockBuffer, nameEncoding); - buffer.WriteBlock(blockBuffer); // Add special long filename header block + // Add special long filename header block + await buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); int nameCharIndex = 0; - while (nameCharIndex < namelen + 1 /* we've allocated one for the null char, now we must make sure it gets written out */) + while + (nameCharIndex < + namelen + 1 /* we've allocated one for the null char, now we must make sure it gets written out */) { Array.Clear(blockBuffer, 0, blockBuffer.Length); - TarHeader.GetAsciiBytes(entry.TarHeader.Name, nameCharIndex, this.blockBuffer, 0, TarBuffer.BlockSize, nameEncoding); // This func handles OK the extra char out of string length + TarHeader.GetAsciiBytes(entry.TarHeader.Name, nameCharIndex, this.blockBuffer, 0, + TarBuffer.BlockSize, nameEncoding); // This func handles OK the extra char out of string length nameCharIndex += TarBuffer.BlockSize; - buffer.WriteBlock(blockBuffer); + + await buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); } } entry.WriteEntryHeader(blockBuffer, nameEncoding); - buffer.WriteBlock(blockBuffer); + await buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); currBytes = 0; @@ -326,13 +370,26 @@ public void PutNextEntry(TarEntry entry) /// to the output stream before this entry is closed and the /// next entry written. /// - public void CloseEntry() + public Task CloseEntryAsync(CancellationToken cancellationToken) => CloseEntryAsync(cancellationToken, true); + + /// + /// Close an entry. This method MUST be called for all file + /// entries that contain data. The reason is that we must + /// buffer data written to the stream in order to satisfy + /// the buffer's block based writes. Thus, there may be + /// data fragments still being assembled that must be written + /// to the output stream before this entry is closed and the + /// next entry written. + /// + public void CloseEntry() => CloseEntryAsync(CancellationToken.None, true).GetAwaiter().GetResult(); + + private async Task CloseEntryAsync(CancellationToken cancellationToken, bool isAsync) { if (assemblyBufferLength > 0) { Array.Clear(assemblyBuffer, assemblyBufferLength, assemblyBuffer.Length - assemblyBufferLength); - buffer.WriteBlock(assemblyBuffer); + await buffer.WriteBlockAsync(assemblyBuffer, 0, cancellationToken, isAsync); currBytes += assemblyBufferLength; assemblyBufferLength = 0; @@ -356,9 +413,12 @@ public void CloseEntry() /// public override void WriteByte(byte value) { - Write(new byte[] { value }, 0, 1); + var oneByteArray = ArrayPool.Shared.Rent(1); + oneByteArray[0] = value; + Write(oneByteArray, 0, 1); + ArrayPool.Shared.Return(oneByteArray); } - + /// /// Writes bytes to the current tar archive entry. This method /// is aware of the current entry and will throw an exception if @@ -377,7 +437,32 @@ public override void WriteByte(byte value) /// /// The number of bytes to write. /// - public override void Write(byte[] buffer, int offset, int count) + public override void Write(byte[] buffer, int offset, int count) => + WriteAsync(buffer, offset, count, CancellationToken.None, false).GetAwaiter().GetResult(); + + /// + /// Writes bytes to the current tar archive entry. This method + /// is aware of the current entry and will throw an exception if + /// you attempt to write bytes past the length specified for the + /// current entry. The method is also (painfully) aware of the + /// record buffering required by TarBuffer, and manages buffers + /// that are not a multiple of recordsize in length, including + /// assembling records from small buffers. + /// + /// + /// The buffer to write to the archive. + /// + /// + /// The offset in the buffer from which to get bytes. + /// + /// + /// The number of bytes to write. + /// + /// + public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) => + WriteAsync(buffer, offset, count, cancellationToken, true); + + private async Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken, bool isAsync) { if (buffer == null) { @@ -422,7 +507,7 @@ public override void Write(byte[] buffer, int offset, int count) Array.Copy(assemblyBuffer, 0, blockBuffer, 0, assemblyBufferLength); Array.Copy(buffer, offset, blockBuffer, assemblyBufferLength, aLen); - this.buffer.WriteBlock(blockBuffer); + await this.buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); currBytes += blockBuffer.Length; @@ -454,7 +539,7 @@ public override void Write(byte[] buffer, int offset, int count) break; } - this.buffer.WriteBlock(buffer, offset); + await this.buffer.WriteBlockAsync(buffer, offset, cancellationToken, isAsync); int bufferLength = blockBuffer.Length; currBytes += bufferLength; @@ -467,11 +552,11 @@ public override void Write(byte[] buffer, int offset, int count) /// Write an EOF (end of archive) block to the tar archive. /// The end of the archive is indicated by two blocks consisting entirely of zero bytes. /// - private void WriteEofBlock() + private async Task WriteEofBlockAsync(CancellationToken cancellationToken, bool isAsync) { Array.Clear(blockBuffer, 0, blockBuffer.Length); - buffer.WriteBlock(blockBuffer); - buffer.WriteBlock(blockBuffer); + await buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); + await buffer.WriteBlockAsync(blockBuffer, 0, cancellationToken, isAsync); } #region Instance Fields diff --git a/test/ICSharpCode.SharpZipLib.Tests/Tar/TarTests.cs b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarTests.cs index fae87a736..d1f1f89bc 100644 --- a/test/ICSharpCode.SharpZipLib.Tests/Tar/TarTests.cs +++ b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarTests.cs @@ -5,6 +5,8 @@ using System; using System.IO; using System.Text; +using System.Threading; +using System.Threading.Tasks; using NUnit.Framework.Internal; namespace ICSharpCode.SharpZipLib.Tests.Tar @@ -110,12 +112,12 @@ public void BlockFactorHandling() var offset = blockNumber % TarBuffer.BlockSize; Assert.AreEqual(0, tarData[byteIndex], "Trailing block data should be null iteration {0} block {1} offset {2} index {3}", - factor, blockNumber, offset, byteIndex); + factor, blockNumber, offset, byteIndex); byteIndex += 1; } } } - + /// /// Check that the tar trailer only contains nulls. /// @@ -843,7 +845,7 @@ public void ParseHeaderWithEncoding(int length, string encodingName) [TestCase(100, "shift-jis")] [TestCase(128, "shift-jis")] [Category("Tar")] - public void StreamWithJapaneseName(int length, string encodingName) + public async Task StreamWithJapaneseNameAsync(int length, string encodingName) { // U+3042 is Japanese Hiragana // https://unicode.org/charts/PDF/U3040.pdf @@ -859,13 +861,14 @@ public void StreamWithJapaneseName(int length, string encodingName) tarOutput.PutNextEntry(entry); tarOutput.Write(data, 0, data.Length); } + using(var memInput = new MemoryStream(memoryStream.ToArray())) using(var inputStream = new TarInputStream(memInput, encoding)) { var buf = new byte[64]; - var entry = inputStream.GetNextEntry(); + var entry = await inputStream.GetNextEntryAsync(CancellationToken.None); Assert.AreEqual(entryName, entry.Name); - var bytesread = inputStream.Read(buf, 0, buf.Length); + var bytesread = await inputStream.ReadAsync(buf, 0, buf.Length, CancellationToken.None); Assert.AreEqual(data.Length, bytesread); } File.WriteAllBytes(Path.Combine(Path.GetTempPath(), $"jpnametest_{length}_{encodingName}.tar"), memoryStream.ToArray()); From 23e5f274cc4efbba7733a9857c2982523abdf2be Mon Sep 17 00:00:00 2001 From: Nathan Date: Mon, 11 Apr 2022 22:14:35 +0100 Subject: [PATCH 4/4] add some tests for tar reading / writing --- .../Core/ExactMemoryPool.cs | 71 ++++++++++ .../Core/StringBuilderPool.cs | 8 +- src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs | 10 +- .../Tar/TarInputStream.cs | 61 +++------ .../Core/StringBuilderPoolTests.cs | 77 +++++++++++ .../Tar/TarBufferTests.cs | 125 ++++++++++++++++++ .../Tar/TarInputStreamTests.cs | 91 +++++++++++++ 7 files changed, 390 insertions(+), 53 deletions(-) create mode 100644 src/ICSharpCode.SharpZipLib/Core/ExactMemoryPool.cs create mode 100644 test/ICSharpCode.SharpZipLib.Tests/Core/StringBuilderPoolTests.cs create mode 100644 test/ICSharpCode.SharpZipLib.Tests/Tar/TarBufferTests.cs create mode 100644 test/ICSharpCode.SharpZipLib.Tests/Tar/TarInputStreamTests.cs diff --git a/src/ICSharpCode.SharpZipLib/Core/ExactMemoryPool.cs b/src/ICSharpCode.SharpZipLib/Core/ExactMemoryPool.cs new file mode 100644 index 000000000..d03ca2ecf --- /dev/null +++ b/src/ICSharpCode.SharpZipLib/Core/ExactMemoryPool.cs @@ -0,0 +1,71 @@ +using System; +using System.Buffers; + +namespace ICSharpCode.SharpZipLib.Core +{ + /// + /// A MemoryPool that will return a Memory which is exactly the length asked for using the bufferSize parameter. + /// This is in contrast to the default ArrayMemoryPool which will return a Memory of equal size to the underlying + /// array which at least as long as the minBufferSize parameter. + /// Note: The underlying array may be larger than the slice of Memory + /// + /// + internal sealed class ExactMemoryPool : MemoryPool + { + public new static readonly MemoryPool Shared = new ExactMemoryPool(); + + public override IMemoryOwner Rent(int bufferSize = -1) + { + if ((uint)bufferSize > int.MaxValue || bufferSize < 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + + return new ExactMemoryPoolBuffer(bufferSize); + } + + protected override void Dispose(bool disposing) + { + } + + public override int MaxBufferSize => int.MaxValue; + + private sealed class ExactMemoryPoolBuffer : IMemoryOwner, IDisposable + { + private T[] array; + private readonly int size; + + public ExactMemoryPoolBuffer(int size) + { + this.size = size; + this.array = ArrayPool.Shared.Rent(size); + } + + public Memory Memory + { + get + { + T[] array = this.array; + if (array == null) + { + throw new ObjectDisposedException(nameof(ExactMemoryPoolBuffer)); + } + + return new Memory(array).Slice(0, size); + } + } + + public void Dispose() + { + T[] array = this.array; + if (array == null) + { + return; + } + + this.array = null; + ArrayPool.Shared.Return(array); + } + } + } +} diff --git a/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs b/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs index 08e322a3e..a1121f0cc 100644 --- a/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs +++ b/src/ICSharpCode.SharpZipLib/Core/StringBuilderPool.cs @@ -1,16 +1,16 @@ -using System.Collections.Generic; +using System.Collections.Concurrent; using System.Text; namespace ICSharpCode.SharpZipLib.Core { - class StringBuilderPool + internal class StringBuilderPool { public static StringBuilderPool Instance { get; } = new StringBuilderPool(); - private readonly Queue pool = new Queue(); + private readonly ConcurrentQueue pool = new ConcurrentQueue(); public StringBuilder Rent() { - return pool.Count > 0 ? pool.Dequeue() : new StringBuilder(); + return pool.TryDequeue(out var builder) ? builder : new StringBuilder(); } public void Return(StringBuilder builder) diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs index cd37e7af4..b190ed1f3 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarBuffer.cs @@ -286,18 +286,12 @@ public static bool IsEndOfArchiveBlock(byte[] block) /// /// Skip over a block on the input stream. /// - public void SkipBlock() - { - SkipBlockAsync(CancellationToken.None, false).GetAwaiter().GetResult(); - } + public void SkipBlock() => SkipBlockAsync(CancellationToken.None, false).GetAwaiter().GetResult(); /// /// Skip over a block on the input stream. /// - public Task SkipBlockAsync(CancellationToken ct) - { - return SkipBlockAsync(ct, true).AsTask(); - } + public Task SkipBlockAsync(CancellationToken ct) => SkipBlockAsync(ct, true).AsTask(); private async ValueTask SkipBlockAsync(CancellationToken ct, bool isAsync) { diff --git a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs index cdd84a3d5..36e294628 100644 --- a/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Tar/TarInputStream.cs @@ -301,24 +301,22 @@ private async ValueTask ReadAsync(Memory buffer, CancellationToken ct if (readBuffer != null) { - int sz = (numToRead > readBuffer.Length) ? readBuffer.Length : (int)numToRead; + int sz = (numToRead > readBuffer.Memory.Length) ? readBuffer.Memory.Length : (int)numToRead; - readBuffer.AsSpan().Slice(0, sz).CopyTo(buffer.Slice(offset, sz).Span); + readBuffer.Memory.Slice(0, sz).CopyTo(buffer.Slice(offset, sz)); - if (sz >= readBuffer.Length) + if (sz >= readBuffer.Memory.Length) { + readBuffer.Dispose(); readBuffer = null; } else { - int newLen = readBuffer.Length - sz; - byte[] newBuf = ArrayPool.Shared.Rent(newLen); - readBuffer.AsSpan().Slice(sz, newLen).CopyTo(newBuf.AsSpan().Slice(0, newLen)); - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } - + int newLen = readBuffer.Memory.Length - sz; + var newBuf = ExactMemoryPool.Shared.Rent(newLen); + readBuffer.Memory.Slice(sz, newLen).CopyTo(newBuf.Memory); + readBuffer.Dispose(); + readBuffer = newBuf; } @@ -339,13 +337,10 @@ private async ValueTask ReadAsync(Memory buffer, CancellationToken ct if (recLen > sz) { recBuf.AsSpan().Slice(0, sz).CopyTo(buffer.Slice(offset, sz).Span); - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } + readBuffer?.Dispose(); - readBuffer = ArrayPool.Shared.Rent(recLen - sz); - recBuf.AsSpan().Slice(sz, recLen - sz).CopyTo(readBuffer.AsSpan()); + readBuffer = ExactMemoryPool.Shared.Rent(recLen - sz); + recBuf.AsSpan().Slice(sz, recLen - sz).CopyTo(readBuffer.Memory.Span); } else { @@ -465,7 +460,7 @@ private async ValueTask SkipAsync(long skipCount, CancellationToken ct, bool isA // properly skip over bytes via the TarBuffer... // var length = 8 * 1024; - using (var skipBuf = MemoryPool.Shared.Rent(length)) + using (var skipBuf = ExactMemoryPool.Shared.Rent(length)) { for (long num = skipCount; num > 0;) { @@ -568,10 +563,7 @@ private async ValueTask GetNextEntryAsync(CancellationToken ct, bool i if (hasHitEOF) { currentEntry = null; - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } + readBuffer?.Dispose(); } else { @@ -591,7 +583,7 @@ private async ValueTask GetNextEntryAsync(CancellationToken ct, bool i if (header.TypeFlag == TarHeader.LF_GNU_LONGNAME) { - using (var nameBuffer = MemoryPool.Shared.Rent(TarBuffer.BlockSize)) + using (var nameBuffer = ExactMemoryPool.Shared.Rent(TarBuffer.BlockSize)) { long numToRead = this.entrySize; @@ -678,10 +670,7 @@ private async ValueTask GetNextEntryAsync(CancellationToken ct, bool i if (entryFactory == null) { currentEntry = new TarEntry(headerBuf, encoding); - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } + readBuffer?.Dispose(); if (longName != null) { @@ -691,10 +680,7 @@ private async ValueTask GetNextEntryAsync(CancellationToken ct, bool i else { currentEntry = entryFactory.CreateEntry(headerBuf); - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } + readBuffer?.Dispose(); } // Magic was checked here for 'ustar' but there are multiple valid possibilities @@ -710,10 +696,7 @@ private async ValueTask GetNextEntryAsync(CancellationToken ct, bool i entrySize = 0; entryOffset = 0; currentEntry = null; - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } + readBuffer?.Dispose(); string errorText = string.Format("Bad header in record {0} block {1} {2}", tarBuffer.CurrentRecord, tarBuffer.CurrentBlock, ex.Message); @@ -781,11 +764,7 @@ private async ValueTask SkipToNextEntryAsync(CancellationToken ct, bool isAsync) await SkipAsync(numToSkip, ct, isAsync); } - if (readBuffer != null) - { - ArrayPool.Shared.Return(readBuffer); - } - + readBuffer?.Dispose(); readBuffer = null; } @@ -905,7 +884,7 @@ public TarEntry CreateEntry(byte[] headerBuffer) /// /// Buffer used with calls to Read() /// - protected byte[] readBuffer; + protected IMemoryOwner readBuffer; /// /// Working buffer diff --git a/test/ICSharpCode.SharpZipLib.Tests/Core/StringBuilderPoolTests.cs b/test/ICSharpCode.SharpZipLib.Tests/Core/StringBuilderPoolTests.cs new file mode 100644 index 000000000..85d8c65a9 --- /dev/null +++ b/test/ICSharpCode.SharpZipLib.Tests/Core/StringBuilderPoolTests.cs @@ -0,0 +1,77 @@ +using System.Threading; +using System.Threading.Tasks; +using ICSharpCode.SharpZipLib.Core; +using NUnit.Framework; + +namespace ICSharpCode.SharpZipLib.Tests.Core +{ + [TestFixture] + public class StringBuilderPoolTests + { + [Test] + [Category("Core")] + public void RoundTrip() + { + var pool = new StringBuilderPool(); + var builder1 = pool.Rent(); + pool.Return(builder1); + var builder2 = pool.Rent(); + Assert.AreEqual(builder1, builder2); + } + + [Test] + [Category("Core")] + public void ReturnsClears() + { + var pool = new StringBuilderPool(); + var builder1 = pool.Rent(); + builder1.Append("Hello"); + pool.Return(builder1); + Assert.AreEqual(0, builder1.Length); + } + + [Test] + [Category("Core")] + public async Task ThreadSafeAsync() + { + // use a lot of threads to increase the likelihood of errors + var concurrency = 100; + + var pool = new StringBuilderPool(); + var gate = new TaskCompletionSource(); + var startedTasks = new Task[concurrency]; + var completedTasks = new Task[concurrency]; + for (int i = 0; i < concurrency; i++) + { + var started = new TaskCompletionSource(); + startedTasks[i] = started.Task; + var captured = i; + completedTasks[i] = Task.Run(async () => + { + started.SetResult(true); + await gate.Task; + var builder = pool.Rent(); + builder.Append("Hello "); + builder.Append(captured); + var str = builder.ToString(); + pool.Return(builder); + return str; + }); + } + + // make sure all the threads have started + await Task.WhenAll(startedTasks); + + // let them all loose at the same time + gate.SetResult(true); + + // make sure every thread produces the expected string and hence had its own StringBuilder + var results = await Task.WhenAll(completedTasks); + for (int i = 0; i < concurrency; i++) + { + var result = results[i]; + Assert.AreEqual($"Hello {i}", result); + } + } + } +} diff --git a/test/ICSharpCode.SharpZipLib.Tests/Tar/TarBufferTests.cs b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarBufferTests.cs new file mode 100644 index 000000000..3974ffb5b --- /dev/null +++ b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarBufferTests.cs @@ -0,0 +1,125 @@ +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using ICSharpCode.SharpZipLib.Tar; +using NUnit.Framework; + +namespace ICSharpCode.SharpZipLib.Tests.Tar +{ + [TestFixture] + public class TarBufferTests + { + [Test] + public void TestSimpleReadWrite() + { + var ms = new MemoryStream(); + var reader = TarBuffer.CreateInputTarBuffer(ms, 1); + var writer = TarBuffer.CreateOutputTarBuffer(ms, 1); + writer.IsStreamOwner = false; + + var block = new byte[TarBuffer.BlockSize]; + var r = new Random(); + r.NextBytes(block); + + writer.WriteBlock(block); + writer.WriteBlock(block); + writer.WriteBlock(block); + writer.Close(); + + ms.Seek(0, SeekOrigin.Begin); + + var block0 = reader.ReadBlock(); + var block1 = reader.ReadBlock(); + var block2 = reader.ReadBlock(); + Assert.AreEqual(block, block0); + Assert.AreEqual(block, block1); + Assert.AreEqual(block, block2); + writer.Close(); + } + + [Test] + public void TestSkipBlock() + { + var ms = new MemoryStream(); + var reader = TarBuffer.CreateInputTarBuffer(ms, 1); + var writer = TarBuffer.CreateOutputTarBuffer(ms, 1); + writer.IsStreamOwner = false; + + var block0 = new byte[TarBuffer.BlockSize]; + var block1 = new byte[TarBuffer.BlockSize]; + var r = new Random(); + r.NextBytes(block0); + r.NextBytes(block1); + + writer.WriteBlock(block0); + writer.WriteBlock(block1); + writer.Close(); + + ms.Seek(0, SeekOrigin.Begin); + + reader.SkipBlock(); + var block = reader.ReadBlock(); + Assert.AreEqual(block, block1); + writer.Close(); + } + + [Test] + public async Task TestSimpleReadWriteAsync() + { + var ms = new MemoryStream(); + var reader = TarBuffer.CreateInputTarBuffer(ms, 1); + var writer = TarBuffer.CreateOutputTarBuffer(ms, 1); + writer.IsStreamOwner = false; + + var block = new byte[TarBuffer.BlockSize]; + var r = new Random(); + r.NextBytes(block); + + await writer.WriteBlockAsync(block, CancellationToken.None); + await writer.WriteBlockAsync(block, CancellationToken.None); + await writer.WriteBlockAsync(block, CancellationToken.None); + await writer.CloseAsync(CancellationToken.None); + + ms.Seek(0, SeekOrigin.Begin); + + var block0 = new byte[TarBuffer.BlockSize]; + await reader.ReadBlockIntAsync(block0, CancellationToken.None, true); + var block1 = new byte[TarBuffer.BlockSize]; + await reader.ReadBlockIntAsync(block1, CancellationToken.None, true); + var block2 = new byte[TarBuffer.BlockSize]; + await reader.ReadBlockIntAsync(block2, CancellationToken.None, true); + Assert.AreEqual(block, block0); + Assert.AreEqual(block, block1); + Assert.AreEqual(block, block2); + await writer.CloseAsync(CancellationToken.None); + } + + [Test] + public async Task TestSkipBlockAsync() + { + var ms = new MemoryStream(); + var reader = TarBuffer.CreateInputTarBuffer(ms, 1); + var writer = TarBuffer.CreateOutputTarBuffer(ms, 1); + writer.IsStreamOwner = false; + + var block0 = new byte[TarBuffer.BlockSize]; + var block1 = new byte[TarBuffer.BlockSize]; + var r = new Random(); + r.NextBytes(block0); + r.NextBytes(block1); + + await writer.WriteBlockAsync(block0, CancellationToken.None); + await writer.WriteBlockAsync(block1, CancellationToken.None); + await writer.CloseAsync(CancellationToken.None); + + ms.Seek(0, SeekOrigin.Begin); + + await reader.SkipBlockAsync(CancellationToken.None); + var block = new byte[TarBuffer.BlockSize]; + await reader.ReadBlockIntAsync(block, CancellationToken.None, true); + Assert.AreEqual(block, block1); + await writer.CloseAsync(CancellationToken.None); + } + } +} diff --git a/test/ICSharpCode.SharpZipLib.Tests/Tar/TarInputStreamTests.cs b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarInputStreamTests.cs new file mode 100644 index 000000000..83457834f --- /dev/null +++ b/test/ICSharpCode.SharpZipLib.Tests/Tar/TarInputStreamTests.cs @@ -0,0 +1,91 @@ +using System; +using System.IO; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using ICSharpCode.SharpZipLib.Tar; +using NUnit.Framework; + +namespace ICSharpCode.SharpZipLib.Tests.Tar +{ + public class TarInputStreamTests + { + [Test] + public void TestRead() + { + var entryBytes = new byte[2000]; + var r = new Random(); + r.NextBytes(entryBytes); + using var ms = new MemoryStream(); + using (var tos = new TarOutputStream(ms, Encoding.UTF8) { IsStreamOwner = false }) + { + var e = TarEntry.CreateTarEntry("some entry"); + e.Size = entryBytes.Length; + tos.PutNextEntry(e); + tos.Write(entryBytes, 0, entryBytes.Length); + tos.CloseEntry(); + } + + ms.Seek(0, SeekOrigin.Begin); + + using var tis = new TarInputStream(ms, Encoding.UTF8); + var entry = tis.GetNextEntry(); + Assert.AreEqual("some entry", entry.Name); + var buffer = new byte[1000]; // smaller than 2 blocks + var read0 = tis.Read(buffer, 0, buffer.Length); + Assert.AreEqual(1000, read0); + Assert.AreEqual(entryBytes.AsSpan(0, 1000).ToArray(), buffer); + + var read1 = tis.Read(buffer, 0, 5); + Assert.AreEqual(5, read1); + Assert.AreEqual(entryBytes.AsSpan(1000, 5).ToArray(), buffer.AsSpan().Slice(0, 5).ToArray()); + + var read2 = tis.Read(buffer, 0, 20); + Assert.AreEqual(20, read2); + Assert.AreEqual(entryBytes.AsSpan(1005, 20).ToArray(), buffer.AsSpan().Slice(0, 20).ToArray()); + + var read3 = tis.Read(buffer, 0, 975); + Assert.AreEqual(975, read3); + Assert.AreEqual(entryBytes.AsSpan(1025, 975).ToArray(), buffer.AsSpan().Slice(0, 975).ToArray()); + } + + [Test] + public async Task TestReadAsync() + { + var entryBytes = new byte[2000]; + var r = new Random(); + r.NextBytes(entryBytes); + using var ms = new MemoryStream(); + using (var tos = new TarOutputStream(ms, Encoding.UTF8) { IsStreamOwner = false }) + { + var e = TarEntry.CreateTarEntry("some entry"); + e.Size = entryBytes.Length; + await tos.PutNextEntryAsync(e, CancellationToken.None); + await tos.WriteAsync(entryBytes, 0, entryBytes.Length); + await tos.CloseEntryAsync(CancellationToken.None); + } + + ms.Seek(0, SeekOrigin.Begin); + + using var tis = new TarInputStream(ms, Encoding.UTF8); + var entry = await tis.GetNextEntryAsync(CancellationToken.None); + Assert.AreEqual("some entry", entry.Name); + var buffer = new byte[1000]; // smaller than 2 blocks + var read0 = await tis.ReadAsync(buffer, 0, buffer.Length); + Assert.AreEqual(1000, read0); + Assert.AreEqual(entryBytes.AsSpan(0, 1000).ToArray(), buffer); + + var read1 = await tis.ReadAsync(buffer, 0, 5); + Assert.AreEqual(5, read1); + Assert.AreEqual(entryBytes.AsSpan(1000, 5).ToArray(), buffer.AsSpan().Slice(0, 5).ToArray()); + + var read2 = await tis.ReadAsync(buffer, 0, 20); + Assert.AreEqual(20, read2); + Assert.AreEqual(entryBytes.AsSpan(1005, 20).ToArray(), buffer.AsSpan().Slice(0, 20).ToArray()); + + var read3 = await tis.ReadAsync(buffer, 0, 975); + Assert.AreEqual(975, read3); + Assert.AreEqual(entryBytes.AsSpan(1025, 975).ToArray(), buffer.AsSpan().Slice(0, 975).ToArray()); + } + } +}