Skip to content

Commit

Permalink
Expose BinaryReader / BinaryWriter 7-bit encoding methods publicly (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
GrabYourPitchforks committed Apr 22, 2020
1 parent c6610a1 commit 288a04f
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 69 deletions.
16 changes: 1 addition & 15 deletions src/libraries/Common/src/System/Resources/ResourceWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ public void Generate()
ResourceTypeCode typeCode = FindTypeCode(value, typeNames);

// Write out type code
Write7BitEncodedInt(data, (int)typeCode);
data.Write7BitEncodedInt((int)typeCode);

var userProvidedResource = value as PrecannedResource;
if (userProvidedResource != null)
Expand Down Expand Up @@ -417,20 +417,6 @@ public void Generate()
_resourceList = null;
}

private static void Write7BitEncodedInt(BinaryWriter store, int value)
{
Debug.Assert(store != null);
// Write out an int 7 bits at a time. The high bit of the byte,
// when on, tells reader to continue reading more bytes.
uint v = (uint)value; // support negative numbers
while (v >= 0x80)
{
store.Write((byte)(v | 0x80));
v >>= 7;
}
store.Write((byte)v);
}

// Finds the ResourceTypeCode for a type, or adds this type to the
// types list.
private ResourceTypeCode FindTypeCode(object? value, List<string> types)
Expand Down
123 changes: 123 additions & 0 deletions src/libraries/System.IO/tests/BinaryReader/BinaryReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,129 @@ public void BinaryReader_CloseTests_Negative()
}
}

[Fact]
public void BinaryReader_EofReachedEarlyTests_ThrowsException()
{
// test integer primitives

RunTest(writer => writer.Write(byte.MinValue), reader => reader.ReadByte());
RunTest(writer => writer.Write(byte.MaxValue), reader => reader.ReadByte());
RunTest(writer => writer.Write(sbyte.MinValue), reader => reader.ReadSByte());
RunTest(writer => writer.Write(sbyte.MaxValue), reader => reader.ReadSByte());
RunTest(writer => writer.Write(short.MinValue), reader => reader.ReadInt16());
RunTest(writer => writer.Write(short.MaxValue), reader => reader.ReadInt16());
RunTest(writer => writer.Write(ushort.MinValue), reader => reader.ReadUInt16());
RunTest(writer => writer.Write(ushort.MaxValue), reader => reader.ReadUInt16());
RunTest(writer => writer.Write(int.MinValue), reader => reader.ReadInt32());
RunTest(writer => writer.Write(int.MaxValue), reader => reader.ReadInt32());
RunTest(writer => writer.Write(uint.MinValue), reader => reader.ReadUInt32());
RunTest(writer => writer.Write(uint.MaxValue), reader => reader.ReadUInt32());
RunTest(writer => writer.Write(long.MinValue), reader => reader.ReadInt64());
RunTest(writer => writer.Write(long.MaxValue), reader => reader.ReadInt64());
RunTest(writer => writer.Write(ulong.MinValue), reader => reader.ReadUInt64());
RunTest(writer => writer.Write(ulong.MaxValue), reader => reader.ReadUInt64());
RunTest(writer => writer.Write7BitEncodedInt(int.MinValue), reader => reader.Read7BitEncodedInt());
RunTest(writer => writer.Write7BitEncodedInt(int.MaxValue), reader => reader.Read7BitEncodedInt());
RunTest(writer => writer.Write7BitEncodedInt64(long.MinValue), reader => reader.Read7BitEncodedInt64());
RunTest(writer => writer.Write7BitEncodedInt64(long.MaxValue), reader => reader.Read7BitEncodedInt64());

// test non-integer numeric types

RunTest(writer => writer.Write((float)0.1234), reader => reader.ReadSingle());
RunTest(writer => writer.Write((double)0.1234), reader => reader.ReadDouble());
RunTest(writer => writer.Write((decimal)0.1234), reader => reader.ReadDecimal());

// test non-numeric primitive types

RunTest(writer => writer.Write(true), reader => reader.ReadBoolean());
RunTest(writer => writer.Write(false), reader => reader.ReadBoolean());
RunTest(writer => writer.Write(string.Empty), reader => reader.ReadString());
RunTest(writer => writer.Write("hello world"), reader => reader.ReadString());
RunTest(writer => writer.Write(new string('x', 1024 * 1024)), reader => reader.ReadString());

void RunTest(Action<BinaryWriter> writeAction, Action<BinaryReader> readAction)
{
UTF8Encoding encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
MemoryStream memoryStream = new MemoryStream();

// First, call the write action twice

BinaryWriter writer = new BinaryWriter(memoryStream, encoding, leaveOpen: true);
writeAction(writer);
writeAction(writer);
writer.Close();

// Make sure we populated the inner stream, then truncate it before EOF reached.

Assert.True(memoryStream.Length > 0);
memoryStream.Position = 0; // reset read pointer
memoryStream.SetLength(memoryStream.Length - 1); // truncate the last byte of the stream

BinaryReader reader = new BinaryReader(memoryStream, encoding);
readAction(reader); // should succeed
Assert.Throws<EndOfStreamException>(() => readAction(reader)); // should fail
}
}

/*
* Other tests for Read7BitEncodedInt[64] are in BinaryWriter.WriteTests.cs, not here.
*/

[Fact]
public void BinaryReader_Read7BitEncodedInt_AllowsOverlongEncodings()
{
MemoryStream memoryStream = new MemoryStream(new byte[] { 0x9F, 0x00 /* overlong */ });
BinaryReader reader = new BinaryReader(memoryStream);

int actual = reader.Read7BitEncodedInt();
Assert.Equal(0x1F, actual);
}

[Fact]
public void BinaryReader_Read7BitEncodedInt_BadFormat_Throws()
{
// Serialized form of 0b1_00000000_00000000_00000000_00000000
// |0x10|| 0x80 || 0x80 || 0x80 || 0x80|

MemoryStream memoryStream = new MemoryStream(new byte[] { 0x80, 0x80, 0x80, 0x80, 0x10 });
BinaryReader reader = new BinaryReader(memoryStream);
Assert.Throws<FormatException>(() => reader.Read7BitEncodedInt());

// 5 bytes, all with the "there's more data after this" flag set

memoryStream = new MemoryStream(new byte[] { 0x80, 0x80, 0x80, 0x80, 0x80 });
reader = new BinaryReader(memoryStream);
Assert.Throws<FormatException>(() => reader.Read7BitEncodedInt());
}

[Fact]
public void BinaryReader_Read7BitEncodedInt64_AllowsOverlongEncodings()
{
MemoryStream memoryStream = new MemoryStream(new byte[] { 0x9F, 0x00 /* overlong */ });
BinaryReader reader = new BinaryReader(memoryStream);

long actual = reader.Read7BitEncodedInt64();
Assert.Equal(0x1F, actual);
}

[Fact]
public void BinaryReader_Read7BitEncodedInt64_BadFormat_Throws()
{
// Serialized form of 0b1_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000
// | || 0x80| | 0x80|| 0x80 || 0x80 || 0x80 || 0x80 || 0x80 || 0x80 || 0x80|
// `-- 0x02

MemoryStream memoryStream = new MemoryStream(new byte[] { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02 });
BinaryReader reader = new BinaryReader(memoryStream);
Assert.Throws<FormatException>(() => reader.Read7BitEncodedInt64());

// 10 bytes, all with the "there's more data after this" flag set

memoryStream = new MemoryStream(new byte[] { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 });
reader = new BinaryReader(memoryStream);
Assert.Throws<FormatException>(() => reader.Read7BitEncodedInt());
}

private void ValidateDisposedExceptions(BinaryReader binaryReader)
{
byte[] byteBuffer = new byte[10];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Xunit;
using System;
using System.IO;
using System.Text;
using Xunit;

namespace System.IO.Tests
{
Expand Down Expand Up @@ -90,6 +88,18 @@ public void BinaryWriter_WriteInt32Test()
WriteTest(i32arr, (bw, s) => bw.Write(s), (br) => br.ReadInt32());
}

[Fact]
public void BinaryWriter_Write7BitEncodedIntTest()
{
int[] i32arr = new int[]
{
int.MinValue, int.MaxValue, 0, -10000, 10000, -50, 50,
unchecked((int)uint.MinValue), unchecked((int)uint.MaxValue), unchecked((int)(uint.MaxValue - 100))
};

WriteTest(i32arr, (bw, s) => bw.Write7BitEncodedInt(s), (br) => br.Read7BitEncodedInt());
}

[Fact]
public void BinaryWriter_WriteInt64Test()
{
Expand All @@ -98,6 +108,18 @@ public void BinaryWriter_WriteInt64Test()
WriteTest(i64arr, (bw, s) => bw.Write(s), (br) => br.ReadInt64());
}

[Fact]
public void BinaryWriter_Write7BitEncodedInt64Test()
{
long[] i64arr = new long[]
{
long.MinValue, long.MaxValue, 0, -10000, 10000, -50, 50,
unchecked((long)ulong.MinValue), unchecked((long)ulong.MaxValue), unchecked((long)(ulong.MaxValue - 100))
};

WriteTest(i64arr, (bw, s) => bw.Write7BitEncodedInt64(s), (br) => br.Read7BitEncodedInt64());
}

[Fact]
public void BinaryWriter_WriteUInt16Test()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2194,8 +2194,8 @@
<data name="Format_AttributeUsage" xml:space="preserve">
<value>Duplicate AttributeUsageAttribute found on attribute type {0}.</value>
</data>
<data name="Format_Bad7BitInt32" xml:space="preserve">
<value>Too many bytes in what should have been a 7 bit encoded Int32.</value>
<data name="Format_Bad7BitInt" xml:space="preserve">
<value>Too many bytes in what should have been a 7-bit encoded integer.</value>
</data>
<data name="Format_BadBase" xml:space="preserve">
<value>Invalid digits for the specified base.</value>
Expand Down
90 changes: 75 additions & 15 deletions src/libraries/System.Private.CoreLib/src/System/IO/BinaryReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -584,28 +584,88 @@ protected virtual void FillBuffer(int numBytes)
} while (bytesRead < numBytes);
}

protected internal int Read7BitEncodedInt()
public int Read7BitEncodedInt()
{
// Read out an Int32 7 bits at a time. The high bit
// Unlike writing, we can't delegate to the 64-bit read on
// 64-bit platforms. The reason for this is that we want to
// stop consuming bytes if we encounter an integer overflow.

uint result = 0;
byte byteReadJustNow;

// Read the integer 7 bits at a time. The high bit
// of the byte when on means to continue reading more bytes.
int count = 0;
int shift = 0;
byte b;
do
//
// There are two failure cases: we've read more than 5 bytes,
// or the fifth byte is about to cause integer overflow.
// This means that we can read the first 4 bytes without
// worrying about integer overflow.

const int MaxBytesWithoutOverflow = 4;
for (int shift = 0; shift < MaxBytesWithoutOverflow * 7; shift += 7)
{
// Check for a corrupted stream. Read a max of 5 bytes.
// In a future version, add a DataFormatException.
if (shift == 5 * 7) // 5 bytes max per Int32, shift += 7
// ReadByte handles end of stream cases for us.
byteReadJustNow = ReadByte();
result |= (byteReadJustNow & 0x7Fu) << shift;

if (byteReadJustNow <= 0x7Fu)
{
throw new FormatException(SR.Format_Bad7BitInt32);
return (int)result; // early exit
}
}

// Read the 5th byte. Since we already read 28 bits,
// the value of this byte must fit within 4 bits (32 - 28),
// and it must not have the high bit set.

byteReadJustNow = ReadByte();
if (byteReadJustNow > 0b_1111u)
{
throw new FormatException(SR.Format_Bad7BitInt);
}

result |= (uint)byteReadJustNow << (MaxBytesWithoutOverflow * 7);
return (int)result;
}

public long Read7BitEncodedInt64()
{
ulong result = 0;
byte byteReadJustNow;

// Read the integer 7 bits at a time. The high bit
// of the byte when on means to continue reading more bytes.
//
// There are two failure cases: we've read more than 10 bytes,
// or the tenth byte is about to cause integer overflow.
// This means that we can read the first 9 bytes without
// worrying about integer overflow.

const int MaxBytesWithoutOverflow = 9;
for (int shift = 0; shift < MaxBytesWithoutOverflow * 7; shift += 7)
{
// ReadByte handles end of stream cases for us.
b = ReadByte();
count |= (b & 0x7F) << shift;
shift += 7;
} while ((b & 0x80) != 0);
return count;
byteReadJustNow = ReadByte();
result |= (byteReadJustNow & 0x7Ful) << shift;

if (byteReadJustNow <= 0x7Fu)
{
return (long)result; // early exit
}
}

// Read the 10th byte. Since we already read 63 bits,
// the value of this byte must fit within 1 bit (64 - 63),
// and it must not have the high bit set.

byteReadJustNow = ReadByte();
if (byteReadJustNow > 0b_1u)
{
throw new FormatException(SR.Format_Bad7BitInt);
}

result |= (ulong)byteReadJustNow << (MaxBytesWithoutOverflow * 7);
return (long)result;
}
}
}
39 changes: 32 additions & 7 deletions src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -453,17 +453,42 @@ public virtual void Write(ReadOnlySpan<char> chars)
}
}

protected void Write7BitEncodedInt(int value)
public void Write7BitEncodedInt(int value)
{
// Write out an int 7 bits at a time. The high bit of the byte,
uint uValue = (uint)value;

// Write out an int 7 bits at a time. The high bit of the byte,
// when on, tells reader to continue reading more bytes.
//
// Using the constants 0x7F and ~0x7F below offers smaller
// codegen than using the constant 0x80.

while (uValue > 0x7Fu)
{
Write((byte)(uValue | ~0x7Fu));
uValue >>= 7;
}

Write((byte)uValue);
}

public void Write7BitEncodedInt64(long value)
{
ulong uValue = (ulong)value;

// Write out an int 7 bits at a time. The high bit of the byte,
// when on, tells reader to continue reading more bytes.
uint v = (uint)value; // support negative numbers
while (v >= 0x80)
//
// Using the constants 0x7F and ~0x7F below offers smaller
// codegen than using the constant 0x80.

while (uValue > 0x7Fu)
{
Write((byte)(v | 0x80));
v >>= 7;
Write((byte)((uint)uValue | ~0x7Fu));
uValue >>= 7;
}
Write((byte)v);

Write((byte)uValue);
}
}
}
Loading

0 comments on commit 288a04f

Please sign in to comment.