Skip to content

Commit

Permalink
Decode revision reader directly to Span (#11225)
Browse files Browse the repository at this point in the history
* Decode revision reader directly to Span

Avoid allocations, do not allocate a string when decoding Git data

Remove methods to simplify flow

---------

Co-authored-by: Gerhard Gerhard Olsson <gerhardol@users.noreply.github.com>
  • Loading branch information
gerhardol and gerhardol committed Oct 1, 2023
1 parent 093a56e commit f2bff50
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 270 deletions.
2 changes: 1 addition & 1 deletion GitCommands/Git/GitModule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2158,7 +2158,7 @@ public IReadOnlyList<PatchFile> GetInteractiveRebasePatchFiles()
return patchFiles;
}

RevisionReader reader = new(this, hasReflogSelector: false, allBodies: true);
RevisionReader reader = new(this, allBodies: true);
Dictionary<string, GitRevision> rebasedCommitsRevisions;
try
{
Expand Down
4 changes: 4 additions & 0 deletions GitCommands/GitCommands.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
<!-- To be removed when NRT annotations are complete -->
<Nullable>annotations</Nullable>
</PropertyGroup>
<PropertyGroup>
<!-- To be limited to Debug after initial usage -->
<DefineConstants>$(DefineConstants);TRACE_REVISIONREADER</DefineConstants>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\GitExtUtils\Delimiters.cs" Link="Utils\Delimiters.cs" />
Expand Down
332 changes: 162 additions & 170 deletions GitCommands/RevisionReader.cs

Large diffs are not rendered by default.

136 changes: 58 additions & 78 deletions GitCommands/StreamExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,108 +5,88 @@ namespace GitCommands
public static class StreamExtensions
{
[MustUseReturnValue]
public static IEnumerable<ArraySegment<byte>> ReadNullTerminatedChunks(this Stream stream, ref byte[] buffer)
public static IEnumerable<ReadOnlyMemory<byte>> SplitLogOutput(this Stream stream, int initialBufferSize = 4096)
{
// Work around generator functions and ref parameters
var buf = buffer;
var chunks = Run();
buffer = buf;
return chunks;
byte[] buffer = new byte[initialBufferSize];

IEnumerable<ArraySegment<byte>> Run()
int yieldFromIndex = 0;
int writeToIndex = 0;

while (true)
{
var yieldFromIndex = 0;
var writeToIndex = 0;
// Fill the buffer with data
int bytesRead = stream.Read(buffer, writeToIndex, buffer.Length - writeToIndex);

while (true)
if (bytesRead == 0)
{
// Fill the buffer with data
var bytesRead = stream.Read(buf, writeToIndex, buf.Length - writeToIndex);

if (bytesRead == 0)
// End of stream, just yield the remaining data (no null terminator for last commit)
if (writeToIndex != 0 && writeToIndex != yieldFromIndex)
{
if (writeToIndex != 0 && writeToIndex != yieldFromIndex)
{
yield return new ArraySegment<byte>(buf, yieldFromIndex, writeToIndex - yieldFromIndex);
}

yield break;
yield return new ReadOnlyMemory<byte>(buffer, yieldFromIndex, writeToIndex - yieldFromIndex);
}

var dataUntilIndex = writeToIndex + bytesRead;
yield break;
}

var searchFromIndex = writeToIndex;
writeToIndex += bytesRead;
var searchBeforeIndex = writeToIndex;
int dataUntilIndex = writeToIndex + bytesRead;

while (searchFromIndex < searchBeforeIndex)
{
var nullIndex = Array.IndexOf<byte>(buf, 0, searchFromIndex, searchBeforeIndex - searchFromIndex);
int searchFromIndex = writeToIndex;
writeToIndex += bytesRead;
int searchBeforeIndex = writeToIndex;

if (nullIndex == -1)
while (searchFromIndex < searchBeforeIndex)
{
int nullIndex = Array.IndexOf<byte>(buffer, 0, searchFromIndex, searchBeforeIndex - searchFromIndex);

if (nullIndex == -1)
{
// null not found in the data available to process
if (searchBeforeIndex == buffer.Length)
{
// We didn't find a null in the data we have available to process
if (searchBeforeIndex == buf.Length)
// end of the buffer
if (yieldFromIndex != 0)
{
// We are at the end of the buffer
if (yieldFromIndex != 0)
{
// There is free space earlier in the buffer, so shift this chunk to the beginning

var unprocessedByteCount = buf.Length - yieldFromIndex;
// Shift unprocessed to the beginning
int unprocessedByteCount = buffer.Length - yieldFromIndex;
Array.Copy(buffer, yieldFromIndex, buffer, 0, unprocessedByteCount);

// Move unprocessed bytes from the end to the start of the buffer.
Array.Copy(buf, yieldFromIndex, buf, 0, unprocessedByteCount);

// Restart loop
yieldFromIndex = 0;
writeToIndex = unprocessedByteCount;
}
else
{
// The buffer is full, with no nulls, so allocate a larger buffer

// Allocate new buffer with twice the size
var newBuffer = new byte[buf.Length * 2];
// Restart loop
yieldFromIndex = 0;
writeToIndex = unprocessedByteCount;
}
else
{
// The buffer is full, with no nulls, so allocate a larger buffer

// Copy old to new buffers
Array.Copy(buf, newBuffer, buf.Length);
// Allocate new buffer with twice the size and copy
byte[] newBuffer = new byte[buffer.Length * 2];
Array.Copy(buffer, newBuffer, buffer.Length);

// Restart loop
writeToIndex = buf.Length;
// Restart loop (yieldFromIndex is unchanged)
writeToIndex = buffer.Length;

// Writeback
buf = newBuffer;
}
// Writeback
buffer = newBuffer;
}

break;
}

// yield any inner chunks
yield return new ArraySegment<byte>(buf, yieldFromIndex, nullIndex - yieldFromIndex);
break;
}

if (nullIndex + 1 == dataUntilIndex)
{
yieldFromIndex = writeToIndex = 0;
break;
}
else
{
searchFromIndex = yieldFromIndex = nullIndex + 1;
}
// yield any inner chunks
yield return new ReadOnlyMemory<byte>(buffer, yieldFromIndex, nullIndex - yieldFromIndex);

if (nullIndex + 1 == dataUntilIndex)
{
// all data in the buffer processed
yieldFromIndex = writeToIndex = 0;
break;
}

// process the remaining (possibly partial) commits
searchFromIndex = yieldFromIndex = nullIndex + 1;
}
}
}

[MustUseReturnValue]
public static byte[] ReadAllBytes(this Stream stream)
{
// NOTE no need to dispose MemoryStream
MemoryStream memoryStream = new();
stream.CopyTo(memoryStream);
return memoryStream.ToArray();
}
}
}
2 changes: 1 addition & 1 deletion GitUI/CommandsDialogs/FormBrowse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2744,7 +2744,7 @@ private void toggleLeftPanel_Click(object sender, EventArgs e)
Lazy<IReadOnlyCollection<GitRevision>> getStashRevs = new(() =>
!AppSettings.ShowStashes
? Array.Empty<GitRevision>()
: new RevisionReader(new(UICommands.GitModule.WorkingDir), hasReflogSelector: true).GetStashes(CancellationToken.None));
: new RevisionReader(new(UICommands.GitModule.WorkingDir)).GetStashes(CancellationToken.None));

RefreshLeftPanel(new FilteredGitRefsProvider(UICommands.GitModule).GetRefs, getStashRevs, forceRefresh: true);
repoObjectsTree.RefreshRevisionsLoaded();
Expand Down
6 changes: 3 additions & 3 deletions GitUI/UserControls/RevisionGrid/RevisionGridControl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ public void PerformRefreshRevisions(Func<RefsFilter, IReadOnlyList<IGitRef>> get
Lazy<IReadOnlyCollection<GitRevision>> getStashRevs = new(() =>
!AppSettings.ShowStashes
? Array.Empty<GitRevision>()
: new RevisionReader(capturedModule, hasReflogSelector: true).GetStashes(cancellationToken));
: new RevisionReader(capturedModule).GetStashes(cancellationToken));

try
{
Expand Down Expand Up @@ -1035,7 +1035,7 @@ public void PerformRefreshRevisions(Func<RefsFilter, IReadOnlyList<IGitRef>> get
Dictionary<ObjectId, ObjectId> untrackedChildIds = getStashRevs.Value.Where(stash => stash.ParentIds.Count >= 3)
.Take(AppSettings.MaxStashesWithUntrackedFiles)
.ToDictionary(stash => stash.ParentIds[2], stash => stash.ObjectId);
IReadOnlyCollection<GitRevision> untrackedRevs = new RevisionReader(capturedModule, hasReflogSelector: false)
IReadOnlyCollection<GitRevision> untrackedRevs = new RevisionReader(capturedModule)
.GetRevisionsFromList(untrackedChildIds.Keys.ToList(), cancellationToken);
untrackedByStashId = untrackedRevs.ToDictionary(r => untrackedChildIds[r.ObjectId]);
Expand All @@ -1061,7 +1061,7 @@ public void PerformRefreshRevisions(Func<RefsFilter, IReadOnlyList<IGitRef>> get
{
TaskManager.HandleExceptions(() =>
{
RevisionReader reader = new(capturedModule, hasReflogSelector: false);
RevisionReader reader = new(capturedModule);
string pathFilter = BuildPathFilter(_filterInfo.PathFilter);
ParentsAreRewritten = _filterInfo.HasRevisionFilter;
Expand Down
11 changes: 3 additions & 8 deletions UnitTests/GitCommands.Tests/RevisionReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public void TryParseRevisionshould_return_false_if_argument_is_invalid()
[TestCase("bad_parentid_length", false)]
[TestCase("bad_sha", false)]
[TestCase("empty", false)]
[TestCase("illegal_timestamp", true, false, true)]
[TestCase("illegal_timestamp", false, false)]
[TestCase("multi_pathfilter", true)]
[TestCase("no_subject", true)]
[TestCase("normal", true)]
Expand All @@ -59,7 +59,7 @@ public void TryParseRevisionshould_return_false_if_argument_is_invalid()
[TestCase("subject_no_body", true)]
[TestCase("empty_commit", true)]
[TestCase("reflogselector", true, true)]
public async Task TryParseRevision_test(string testName, bool expectedReturn, bool hasReflogSelector = false, bool serialThrows = false)
public async Task TryParseRevision_test(string testName, bool expectedReturn, bool hasReflogSelector = false)
{
string path = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData/RevisionReader", testName + ".bin");
ArraySegment<byte> chunk = File.ReadAllBytes(path);
Expand All @@ -80,12 +80,7 @@ public async Task TryParseRevision_test(string testName, bool expectedReturn, bo
DateTimeZoneHandling = DateTimeZoneHandling.Utc
};

if (serialThrows)
{
Action act = () => JsonConvert.SerializeObject(rev);
act.Should().Throw<JsonSerializationException>();
}
else if (expectedReturn)
if (expectedReturn)
{
await Verifier.VerifyJson(JsonConvert.SerializeObject(rev, timeZoneSettings))
.UseParameters(testName);
Expand Down
15 changes: 6 additions & 9 deletions UnitTests/GitCommands.Tests/StreamExtensionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,22 @@ public void ReadNullTerminatedLines(byte[] input, params byte[][] expectedChunks
MemoryStream stream = new(input);

// Run the test at multiple buffer sizes to test boundary conditions thoroughly
for (var bufferSize = 1; bufferSize < input.Length + 2; bufferSize++)
for (int bufferSize = 1; bufferSize < input.Length + 2; bufferSize++)
{
// Test overload that uses external buffer
var buffer = new byte[bufferSize];

stream.Position = 0;

using var e = stream.ReadNullTerminatedChunks(ref buffer).GetEnumerator();
for (var chunkIndex = 0; chunkIndex < expectedChunks.Length; chunkIndex++)
using IEnumerator<ReadOnlyMemory<byte>> e = stream.SplitLogOutput(bufferSize).GetEnumerator();
for (int chunkIndex = 0; chunkIndex < expectedChunks.Length; chunkIndex++)
{
var expected = expectedChunks[chunkIndex];
byte[] expected = expectedChunks[chunkIndex];
Assert.IsTrue(e.MoveNext());
Assert.AreEqual(
expected,
e.Current.ToArray(),
"input=[{0}] chunkIndex={1} bufferSize={2}", string.Join(",", expected), chunkIndex, bufferSize);
$"input=[{string.Join(",", expected)}] chunkIndex={chunkIndex} bufferSize={{bufferSize}}");
}

Assert.IsFalse(e.MoveNext(), "bufferSize={0}", bufferSize);
Assert.IsFalse(e.MoveNext(), $"bufferSize={bufferSize}");
}
}
}
Expand Down

0 comments on commit f2bff50

Please sign in to comment.