Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve zip file iteration performance #843

Merged
merged 3 commits into from
Aug 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;netcoreapp3.1;net462</TargetFrameworks>
</PropertyGroup>
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net462;net6.0</TargetFrameworks>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet">
<Version>0.12.1</Version>
</PackageReference>
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\src\ICSharpCode.SharpZipLib\ICSharpCode.SharpZipLib.csproj" />
<ProjectReference Include="..\..\src\ICSharpCode.SharpZipLib\ICSharpCode.SharpZipLib.csproj" />
</ItemGroup>

</Project>
5 changes: 2 additions & 3 deletions benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ public class MultipleRuntimes : ManualConfig
{
public MultipleRuntimes()
{
AddJob(Job.Default.WithToolchain(CsProjClassicNetToolchain.Net461).AsBaseline()); // NET 4.6.1
AddJob(Job.Default.WithToolchain(CsProjCoreToolchain.NetCoreApp21)); // .NET Core 2.1
AddJob(Job.Default.WithToolchain(CsProjCoreToolchain.NetCoreApp31)); // .NET Core 3.1
AddJob(Job.Default.WithToolchain(CsProjClassicNetToolchain.Net462).AsBaseline()); // NET 4.6.2
AddJob(Job.Default.WithToolchain(CsProjCoreToolchain.NetCoreApp60)); // .NET 6.0
}
}

Expand Down
63 changes: 63 additions & 0 deletions benchmark/ICSharpCode.SharpZipLib.Benchmark/Zip/ZipFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using ICSharpCode.SharpZipLib.Zip;

namespace ICSharpCode.SharpZipLib.Benchmark.Zip
{
[MemoryDiagnoser]
[Config(typeof(MultipleRuntimes))]
public class ZipFile
{
private readonly byte[] readBuffer = new byte[4096];
private string zipFileWithLargeAmountOfEntriesPath;

[GlobalSetup]
public async Task GlobalSetup()
{
SharpZipLibOptions.InflaterPoolSize = 4;

// large real-world test file from test262 repository
string commitSha = "2e4e0e6b8ebe3348a207144204cb6d7a5571c863";
zipFileWithLargeAmountOfEntriesPath = Path.Combine(Path.GetTempPath(), $"{commitSha}.zip");
if (!File.Exists(zipFileWithLargeAmountOfEntriesPath))
{
var uri = $"https://github.com/tc39/test262/archive/{commitSha}.zip";

Console.WriteLine("Loading test262 repository archive from {0}", uri);

using (var client = new HttpClient())
{
using (var downloadStream = await client.GetStreamAsync(uri))
{
using (var writeStream = File.OpenWrite(zipFileWithLargeAmountOfEntriesPath))
{
await downloadStream.CopyToAsync(writeStream);
Console.WriteLine("File downloaded and saved to {0}", zipFileWithLargeAmountOfEntriesPath);
}
}
}
}

}

[Benchmark]
public void ReadLargeZipFile()
{
using (var file = new SharpZipLib.Zip.ZipFile(zipFileWithLargeAmountOfEntriesPath))
{
foreach (ZipEntry entry in file)
{
using (var stream = file.GetInputStream(entry))
{
while (stream.Read(readBuffer, 0, readBuffer.Length) > 0)
{
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.IO;
using System.IO;
using BenchmarkDotNet.Attributes;

namespace ICSharpCode.SharpZipLib.Benchmark.Zip
Expand All @@ -15,7 +14,8 @@ public class ZipInputStream
byte[] zippedData;
byte[] readBuffer = new byte[4096];

public ZipInputStream()
[GlobalSetup]
public void GlobalSetup()
{
using (var memoryStream = new MemoryStream())
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.IO;
using System.IO;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;

Expand All @@ -16,7 +15,8 @@ public class ZipOutputStream
byte[] outputBuffer;
byte[] inputBuffer;

public ZipOutputStream()
[GlobalSetup]
public void GlobalSetup()
{
inputBuffer = new byte[ChunkSize];
outputBuffer = new byte[N];
Expand Down
66 changes: 66 additions & 0 deletions src/ICSharpCode.SharpZipLib/Core/InflaterPool.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using System;
using System.Collections.Concurrent;
using ICSharpCode.SharpZipLib.Zip.Compression;

namespace ICSharpCode.SharpZipLib.Core
{
/// <summary>
/// Pool for <see cref="Inflater"/> instances as they can be costly due to byte array allocations.
/// </summary>
internal sealed class InflaterPool
{
private readonly ConcurrentQueue<PooledInflater> noHeaderPool = new ConcurrentQueue<PooledInflater>();
private readonly ConcurrentQueue<PooledInflater> headerPool = new ConcurrentQueue<PooledInflater>();

internal static InflaterPool Instance { get; } = new InflaterPool();

private InflaterPool()
{
}

internal Inflater Rent(bool noHeader = false)
{
if (SharpZipLibOptions.InflaterPoolSize <= 0)
{
return new Inflater(noHeader);
}

var pool = GetPool(noHeader);

PooledInflater inf;
if (pool.TryDequeue(out var inflater))
{
inf = inflater;
inf.Reset();
}
else
{
inf = new PooledInflater(noHeader);
}

return inf;
}

internal void Return(Inflater inflater)
{
if (SharpZipLibOptions.InflaterPoolSize <= 0)
{
return;
}

if (!(inflater is PooledInflater pooledInflater))
{
throw new ArgumentException("Returned inflater was not a pooled one");
}

var pool = GetPool(inflater.noHeader);
if (pool.Count < SharpZipLibOptions.InflaterPoolSize)
{
pooledInflater.Reset();
pool.Enqueue(pooledInflater);
}
}

private ConcurrentQueue<PooledInflater> GetPool(bool noHeader) => noHeader ? noHeaderPool : headerPool;
}
}
3 changes: 2 additions & 1 deletion src/ICSharpCode.SharpZipLib/GZip/GzipInputStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.IO;
using System.Text;
using ICSharpCode.SharpZipLib.Core;

namespace ICSharpCode.SharpZipLib.GZip
{
Expand Down Expand Up @@ -82,7 +83,7 @@ public GZipInputStream(Stream baseInputStream)
/// Size of the buffer to use
/// </param>
public GZipInputStream(Stream baseInputStream, int size)
: base(baseInputStream, new Inflater(true), size)
: base(baseInputStream, InflaterPool.Instance.Rent(true), size)
piksel marked this conversation as resolved.
Show resolved Hide resolved
{
}

Expand Down
15 changes: 15 additions & 0 deletions src/ICSharpCode.SharpZipLib/SharpZipLibOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using ICSharpCode.SharpZipLib.Zip.Compression;

namespace ICSharpCode.SharpZipLib
{
/// <summary>
/// Global options to alter behavior.
/// </summary>
public static class SharpZipLibOptions
{
/// <summary>
/// The max pool size allowed for reusing <see cref="Inflater"/> instances, defaults to 0 (disabled).
/// </summary>
public static int InflaterPoolSize { get; set; } = 0;
}
}
2 changes: 1 addition & 1 deletion src/ICSharpCode.SharpZipLib/Zip/Compression/Inflater.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public class Inflater
/// True means, that the inflated stream doesn't contain a Zlib header or
/// footer.
/// </summary>
private bool noHeader;
internal bool noHeader;

private readonly StreamManipulator input;
private OutputWindow outputWindow;
Expand Down
14 changes: 14 additions & 0 deletions src/ICSharpCode.SharpZipLib/Zip/Compression/PooledInflater.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using ICSharpCode.SharpZipLib.Core;

namespace ICSharpCode.SharpZipLib.Zip.Compression
{
/// <summary>
/// A marker type for pooled version of an inflator that we can return back to <see cref="InflaterPool"/>.
/// </summary>
internal sealed class PooledInflater : Inflater
{
public PooledInflater(bool noHeader) : base(noHeader)
{
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.IO;
using System.Security.Cryptography;
using ICSharpCode.SharpZipLib.Core;

namespace ICSharpCode.SharpZipLib.Zip.Compression.Streams
{
Expand Down Expand Up @@ -339,7 +340,7 @@ public class InflaterInputStream : Stream
/// The InputStream to read bytes from
/// </param>
public InflaterInputStream(Stream baseInputStream)
: this(baseInputStream, new Inflater(), 4096)
: this(baseInputStream, InflaterPool.Instance.Rent(), 4096)
piksel marked this conversation as resolved.
Show resolved Hide resolved
{
}

Expand Down Expand Up @@ -630,6 +631,12 @@ protected override void Dispose(bool disposing)
baseInputStream.Dispose();
}
}

if (inf is PooledInflater inflater)
{
InflaterPool.Instance.Return(inflater);
}
inf = null;
}

/// <summary>
Expand Down
Loading
Loading