From 1b9fcfc6103074bb05fe76116d1adc2c999bb340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konrad=20Kruczy=C5=84ski?= Date: Tue, 4 May 2021 15:26:15 +0200 Subject: [PATCH] PR #611: Bzip input stream simple vectorization * Added benchmark for BZip2 decompression. * Updated benchmarks to be run also on .NET Core 3.1. * Simple automatic vectorization of the rotation loop. * Added comment describing vectorization. --- .../BZip2/BZip2InputStream.cs | 37 +++++++++++++++++++ .../ICSharpCode.SharpZipLib.Benchmark.csproj | 2 +- .../Program.cs | 2 +- .../BZip2/BZip2InputStream.cs | 27 ++++++++++++-- 4 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 benchmark/ICSharpCode.SharpZipLib.Benchmark/BZip2/BZip2InputStream.cs diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/BZip2/BZip2InputStream.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/BZip2/BZip2InputStream.cs new file mode 100644 index 000000000..8d5a7ccc2 --- /dev/null +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/BZip2/BZip2InputStream.cs @@ -0,0 +1,37 @@ +using System; +using System.IO; +using BenchmarkDotNet.Attributes; + +namespace ICSharpCode.SharpZipLib.Benchmark.BZip2 +{ + [Config(typeof(MultipleRuntimes))] + public class BZip2InputStream + { + private byte[] compressedData; + + public BZip2InputStream() + { + var outputMemoryStream = new MemoryStream(); + using (var outputStream = new SharpZipLib.BZip2.BZip2OutputStream(outputMemoryStream)) + { + var random = new Random(1234); + var inputData = new byte[1024 * 1024 * 30]; + random.NextBytes(inputData); + var inputMemoryStream = new MemoryStream(inputData); + inputMemoryStream.CopyTo(outputStream); + } + + compressedData = outputMemoryStream.ToArray(); + } + + [Benchmark] + public void DecompressData() + { + var memoryStream = new MemoryStream(compressedData); + using (var inputStream = new SharpZipLib.BZip2.BZip2InputStream(memoryStream)) + { + inputStream.CopyTo(Stream.Null); + } + } + } +} diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/ICSharpCode.SharpZipLib.Benchmark.csproj b/benchmark/ICSharpCode.SharpZipLib.Benchmark/ICSharpCode.SharpZipLib.Benchmark.csproj index 4991a9ad1..81a8ad598 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/ICSharpCode.SharpZipLib.Benchmark.csproj +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/ICSharpCode.SharpZipLib.Benchmark.csproj @@ -2,7 +2,7 @@ Exe - netcoreapp2.1;net461 + netcoreapp2.1;netcoreapp3.1;net461 diff --git a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs index dca463c24..9c79e6551 100644 --- a/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs +++ b/benchmark/ICSharpCode.SharpZipLib.Benchmark/Program.cs @@ -13,7 +13,7 @@ public MultipleRuntimes() { AddJob(Job.Default.WithToolchain(CsProjClassicNetToolchain.Net461).AsBaseline()); // NET 4.6.1 AddJob(Job.Default.WithToolchain(CsProjCoreToolchain.NetCoreApp21)); // .NET Core 2.1 - //Add(Job.Default.With(CsProjCoreToolchain.NetCoreApp30)); // .NET Core 3.0 + AddJob(Job.Default.WithToolchain(CsProjCoreToolchain.NetCoreApp31)); // .NET Core 3.1 } } diff --git a/src/ICSharpCode.SharpZipLib/BZip2/BZip2InputStream.cs b/src/ICSharpCode.SharpZipLib/BZip2/BZip2InputStream.cs index e639bc1f5..8a3d4b826 100644 --- a/src/ICSharpCode.SharpZipLib/BZip2/BZip2InputStream.cs +++ b/src/ICSharpCode.SharpZipLib/BZip2/BZip2InputStream.cs @@ -19,7 +19,11 @@ public class BZip2InputStream : Stream private const int NO_RAND_PART_B_STATE = 6; private const int NO_RAND_PART_C_STATE = 7; - #endregion Constants +#if NETSTANDARD2_1 + private static readonly int VectorSize = System.Numerics.Vector.Count; +#endif + +#endregion Constants #region Instance Fields @@ -711,10 +715,27 @@ cache misses. unzftab[seqToUnseq[tmp]]++; ll8[last] = seqToUnseq[tmp]; - for (int j = nextSym - 1; j > 0; --j) + var j = nextSym - 1; + +#if !NETSTANDARD2_0 && !NETFRAMEWORK + // This is vectorized memory move. Going from the back, we're taking chunks of array + // and write them at the new location shifted by one. Since chunks are VectorSize long, + // at the end we have to move "tail" (or head actually) of the array using a plain loop. + // If System.Numerics.Vector API is not available, the plain loop is used to do the whole copying. + + while(j >= VectorSize) { - yy[j] = yy[j - 1]; + var arrayPart = new System.Numerics.Vector(yy, j - VectorSize); + arrayPart.CopyTo(yy, j - VectorSize + 1); + j -= VectorSize; } +#endif + + while(j > 0) + { + yy[j] = yy[--j]; + } + yy[0] = tmp; if (groupPos == 0)