Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string optimizations #521

Merged
merged 10 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
87 changes: 65 additions & 22 deletions QRCoder/QRCodeGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -745,14 +745,22 @@ bool IsUtf8()
}
}

private static readonly Encoding _iso88591ExceptionFallback = Encoding.GetEncoding(28591, new EncoderExceptionFallback(), new DecoderExceptionFallback()); // ISO-8859-1
/// <summary>
/// Checks if the given string can be accurately represented and retrieved in ISO-8859-1 encoding.
/// </summary>
private static bool IsValidISO(string input)
{
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(input);
var result = Encoding.GetEncoding("ISO-8859-1").GetString(bytes);
return String.Equals(input, result);
codebude marked this conversation as resolved.
Show resolved Hide resolved
// No heap allocations if the string is ISO-8859-1
try
{
_ = _iso88591ExceptionFallback.GetByteCount(input);
return true;
}
catch (EncoderFallbackException) // The exception is a heap allocation and not ideal
Shane32 marked this conversation as resolved.
Show resolved Hide resolved
{
return false;
}
}

/// <summary>
Expand Down Expand Up @@ -866,18 +874,13 @@ private static BitArray PlainTextToBinaryAlphanumeric(string plainText)
return codeText;
}

/// <summary>
/// Returns a string that contains the original string, with characters that cannot be encoded by a
/// specified encoding (default of ISO-8859-2) with a replacement character.
/// </summary>
private static string ConvertToIso8859(string value, string Iso = "ISO-8859-2")
{
Encoding iso = Encoding.GetEncoding(Iso);
Encoding utf8 = Encoding.UTF8;
byte[] utfBytes = utf8.GetBytes(value);
byte[] isoBytes = Encoding.Convert(utf8, iso, utfBytes);
return iso.GetString(isoBytes);
}
codebude marked this conversation as resolved.
Show resolved Hide resolved
private static readonly Encoding _iso8859_1 =
#if NET5_0_OR_GREATER
Encoding.Latin1;
#else
Encoding.GetEncoding(28591); // ISO-8859-1
#endif
private static Encoding _iso8859_2;
codebude marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Converts plain text into a binary format using byte mode encoding, which supports various character encodings through ECI (Extended Channel Interpretations).
Expand All @@ -894,35 +897,69 @@ private static string ConvertToIso8859(string value, string Iso = "ISO-8859-2")
/// </remarks>
private static BitArray PlainTextToBinaryByte(string plainText, EciMode eciMode, bool utf8BOM, bool forceUtf8)
{
byte[] codeBytes;
Encoding targetEncoding;

// Check if the text is valid ISO-8859-1 and UTF-8 is not forced, then encode using ISO-8859-1.
if (IsValidISO(plainText) && !forceUtf8)
codeBytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(plainText);
{
targetEncoding = _iso8859_1;
utf8BOM = false;
}
else
{
// Determine the encoding based on the specified ECI mode.
switch (eciMode)
{
case EciMode.Iso8859_1:
// Convert text to ISO-8859-1 and encode.
codeBytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(ConvertToIso8859(plainText, "ISO-8859-1"));
targetEncoding = _iso8859_1;
utf8BOM = false;
break;
case EciMode.Iso8859_2:
// Note: ISO-8859-2 is not natively supported on .NET Core
//
// Users must install the System.Text.Encoding.CodePages package and call Encoding.RegisterProvider(CodePagesEncodingProvider.Instance)
// before using this encoding mode.
if (_iso8859_2 == null)
_iso8859_2 = Encoding.GetEncoding(28592); // ISO-8859-2
// Convert text to ISO-8859-2 and encode.
codeBytes = Encoding.GetEncoding("ISO-8859-2").GetBytes(ConvertToIso8859(plainText, "ISO-8859-2"));
codebude marked this conversation as resolved.
Show resolved Hide resolved
targetEncoding = _iso8859_2;
utf8BOM = false;
break;
case EciMode.Default:
case EciMode.Utf8:
default:
// Handle UTF-8 encoding, optionally adding a BOM if specified.
codeBytes = utf8BOM ? Encoding.UTF8.GetPreamble().Concat(Encoding.UTF8.GetBytes(plainText)).ToArray() : Encoding.UTF8.GetBytes(plainText);
targetEncoding = Encoding.UTF8;
break;
}
}

#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
// We can use stackalloc for small arrays to prevent heap allocations
// Note that all QR codes should fit within 3000 bytes, so this code should never trigger a heap allocation unless an exception will be thrown anyway.
int count = targetEncoding.GetByteCount(plainText);
Span<byte> codeBytes = count < 3000 ? stackalloc byte[count] : new byte[count];
Shane32 marked this conversation as resolved.
Show resolved Hide resolved
targetEncoding.GetBytes(plainText, codeBytes);
codebude marked this conversation as resolved.
Show resolved Hide resolved
#else
byte[] codeBytes = targetEncoding.GetBytes(plainText);
#endif

// Convert the array of bytes into a BitArray.
return ToBitArray(codeBytes);
if (utf8BOM)
{
// convert to bit array, leaving 24 bits for the UTF-8 preamble
var bitArray = ToBitArray(codeBytes, 24);
// write UTF8 preamble (EF BB BF) to the BitArray
DecToBin(0xEF, 8, bitArray, 0);
DecToBin(0xBB, 8, bitArray, 8);
DecToBin(0xBF, 8, bitArray, 16);
return bitArray;
}
else
{
return ToBitArray(codeBytes);
}
}

/// <summary>
Expand All @@ -932,7 +969,13 @@ private static BitArray PlainTextToBinaryByte(string plainText, EciMode eciMode,
/// <param name="byteArray">The byte array to convert into a BitArray.</param>
/// <param name="prefixZeros">The number of leading zeros to prepend to the resulting BitArray.</param>
/// <returns>A BitArray representing the bits of the input byteArray, with optional leading zeros.</returns>
private static BitArray ToBitArray(byte[] byteArray, int prefixZeros = 0)
private static BitArray ToBitArray(
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1
ReadOnlySpan<byte> byteArray, // byte[] has an implicit cast to ReadOnlySpan<byte>
#else
byte[] byteArray,
#endif
int prefixZeros = 0)
{
// Calculate the total number of bits in the resulting BitArray including the prefix zeros.
var bitArray = new BitArray((int)((uint)byteArray.Length * 8) + prefixZeros);
Expand Down
46 changes: 46 additions & 0 deletions QRCoderTests/QRGeneratorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,26 @@ public void can_encode_byte()
result.ShouldBe
}

[Fact]
[Category("QRGenerator/TextEncoding")]
public void can_encode_utf8()
{
var gen = new QRCodeGenerator();
var qrData = gen.CreateQrCode("https://en.wikipedia.org/wiki/🍕", QRCodeGenerator.ECCLevel.L, true, false, QRCodeGenerator.EciMode.Utf8);
var result = string.Join("", qrData.ModuleMatrix.Select(x => x.ToBitString()).ToArray());
result.ShouldBe
}

[Fact]
[Category("QRGenerator/TextEncoding")]
public void can_encode_utf8_bom()
{
var gen = new QRCodeGenerator();
var qrData = gen.CreateQrCode("https://en.wikipedia.org/wiki/🍕", QRCodeGenerator.ECCLevel.L, true, true, QRCodeGenerator.EciMode.Utf8);
var result = string.Join("", qrData.ModuleMatrix.Select(x => x.ToBitString()).ToArray());
result.ShouldBe
}

[Fact]
[Category("QRGenerator/TextEncoding")]
public void can_generate_from_bytes()
Expand All @@ -170,6 +190,32 @@ public void can_generate_from_bytes()
var result = string.Join("", qrData.ModuleMatrix.Select(x => x.ToBitString()).ToArray());
result.ShouldBe
}

[Fact]
[Category("QRGenerator/TextEncoding")]
public void isValidIso_works()
{
// see private method: QRCodeGenerator.IsValidISO

Encoding _iso88591ExceptionFallback = Encoding.GetEncoding(28591, new EncoderExceptionFallback(), new DecoderExceptionFallback()); // ISO-8859-1

IsValidISO("abc").ShouldBeTrue();
IsValidISO("äöü").ShouldBeTrue();
IsValidISO("🍕").ShouldBeFalse();

bool IsValidISO(string input)
{
try
{
_ = _iso88591ExceptionFallback.GetByteCount(input);
return true;
}
catch (EncoderFallbackException)
{
return false;
}
}
}
}

public static class ExtensionMethods
Expand Down