From c8396d8d94e06c8c24d23bad33554c92a2754fad Mon Sep 17 00:00:00 2001 From: Gerhard Olsson Date: Wed, 30 Jun 2021 04:20:03 +0200 Subject: [PATCH] RevisionReader: Parse raw commit body * Parse raw body %B instead of combining %s%n%n%b to avoid extra processing in both Git and GE. Simplify the handling. * Do not allocate body for older commits Previously a temporary variable was allocated also if body was not stored in the commit message (to save memory, for older commits). * Only store commit body if multiline message If the body is the same as the subject, do not store the message This also avoids loading the commit body for older commits if the commit is selected * Align usage of commit body retrieval for other than RevisionReader * "Previous commit messages" used %s%b instead of %B * Align parsing of raw body and subject * ObjectId Inline local parsing functions * Use ReadOnlySpan in RevisionReader Reduce string allocations * Use HighPerformance StringBuffer --- GitCommands/Git/GitModule.cs | 19 +- GitCommands/GitCommands.csproj | 1 + GitCommands/RevisionReader.cs | 254 +++++++--------- GitCommands/StringPool.cs | 274 ------------------ .../CommandsDialogs/FormVerify.LostObject.cs | 2 +- Packages.props | 1 + Plugins/GitUIPluginInterfaces/GitRevision.cs | 9 +- Plugins/GitUIPluginInterfaces/ObjectId.cs | 28 +- .../GitCommands.Tests/RevisionReaderTests.cs | 4 +- .../GitCommands.Tests/StringPoolTests.cs | 196 ------------- .../Graph/LaneInfoProviderTests.cs | 2 +- 11 files changed, 151 insertions(+), 639 deletions(-) delete mode 100644 GitCommands/StringPool.cs delete mode 100644 UnitTests/GitCommands.Tests/StringPoolTests.cs diff --git a/GitCommands/Git/GitModule.cs b/GitCommands/Git/GitModule.cs index 41d326550b2..860fe00056f 100644 --- a/GitCommands/Git/GitModule.cs +++ b/GitCommands/Git/GitModule.cs @@ -950,11 +950,18 @@ public GitRevision GetRevision(ObjectId? objectId = null, bool shortFormat = fal } else { - string message = ProcessDiffNotes(10); + string message = ProcessDiffNotes(startIndex: 10); // commit message is not re-encoded by git when format is given - revision.Body = ReEncodeCommitMessage(message, revision.MessageEncoding); - revision.Subject = revision.Body?.Substring(0, revision.Body.IndexOfAny(new[] { '\r', '\n' })) ?? ""; + // See also RevisionReader for parsing commit body + string body = ReEncodeCommitMessage(message, revision.MessageEncoding); + revision.Body = body; + + ReadOnlySpan span = (body ?? "").AsSpan(); + int endSubjectIndex = span.IndexOf('\n'); + revision.Subject = endSubjectIndex >= 0 + ? span.Slice(0, endSubjectIndex).TrimEnd().ToString() + : body ?? ""; } if (loadRefs) @@ -3507,7 +3514,7 @@ public string GetFileText(ObjectId id, Encoding encoding) "-z", $"-n {count}", revision, - "--pretty=format:%e%n%s%n%n%b", + "--pretty=format:%e%n%B", { !string.IsNullOrEmpty(authorPattern), string.Concat("--author=\"", authorPattern, "\"") } }; @@ -3884,7 +3891,7 @@ public bool IsRunningGitProcess() // there was a bug: Git before v1.8.4 did not recode commit message when format is given // Lossless encoding is used, because LogOutputEncoding might not be lossless and not recoded // characters could be replaced by replacement character while re-encoding to LogOutputEncoding - public string ReEncodeCommitMessage(string s, string? toEncodingName) + public string? ReEncodeCommitMessage(string s, string? toEncodingName) { Encoding? encoding; try @@ -3896,7 +3903,7 @@ public string ReEncodeCommitMessage(string s, string? toEncodingName) return s + "\n\n! Unsupported commit message encoding: " + toEncodingName + " !"; } - return ReEncodeStringFromLossless(s, encoding); + return ReEncodeStringFromLossless(s, encoding)?.Trim(); } public Encoding? GetEncodingByGitName(string? encodingName) diff --git a/GitCommands/GitCommands.csproj b/GitCommands/GitCommands.csproj index 63ec3b333d3..0e56ef6983d 100644 --- a/GitCommands/GitCommands.csproj +++ b/GitCommands/GitCommands.csproj @@ -22,6 +22,7 @@ + diff --git a/GitCommands/RevisionReader.cs b/GitCommands/RevisionReader.cs index 793ebcd2902..37f56adf1b6 100644 --- a/GitCommands/RevisionReader.cs +++ b/GitCommands/RevisionReader.cs @@ -3,11 +3,13 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; using GitExtUtils; using GitUI; using GitUIPluginInterfaces; +using Microsoft.Toolkit.HighPerformance.Buffers; using Microsoft.VisualStudio.Threading; namespace GitCommands @@ -35,24 +37,22 @@ public sealed class RevisionReader : IDisposable { private const string FullFormat = - // These header entries can all be decoded from the bytes directly. - // Each hash is 20 bytes long. - - /* Object ID */ "%H" + - /* Tree ID */ "%T" + - /* Parent IDs */ "%P%n" + - /* Author date */ "%at%n" + - /* Commit date */ "%ct%n" + - /* Encoding */ "%e%n" + - /* - Items below here must be decoded as strings to support non-ASCII. - */ - /* Author name */ "%aN%n" + - /* Author email */ "%aE%n" + - /* Committer name */ "%cN%n" + - /* Committer email */ "%cE%n" + - /* Commit subject */ "%s%n%n" + - /* Commit body */ "%b"; + // These header entries can all be decoded from the bytes directly. + // Each hash is 20 bytes long. + + /* Object ID */ "%H" + + /* Tree ID */ "%T" + + /* Parent IDs */ "%P%n" + + /* Author date */ "%at%n" + + /* Commit date */ "%ct%n" + + /* Encoding */ "%e%n" + + + // Items below here must be decoded as strings to support non-ASCII. + /* Author name */ "%aN%n" + + /* Author email */ "%aE%n" + + /* Committer name */ "%cN%n" + + /* Committer email */ "%cE%n" + + /* Commit raw body */ "%B"; private readonly CancellationTokenSequence _cancellationTokenSequence = new(); @@ -114,8 +114,6 @@ public sealed class RevisionReader : IDisposable int parseErrors = 0; #endif - // This property is relatively expensive to call for every revision, so - // cache it for the duration of the loop. var logOutputEncoding = module.LogOutputEncoding; long sixMonths = new DateTimeOffset(DateTime.Now.ToUniversalTime() - TimeSpan.FromDays(30 * 6)).ToUnixTimeSeconds(); @@ -123,37 +121,21 @@ public sealed class RevisionReader : IDisposable { token.ThrowIfCancellationRequested(); - // Pool string values likely to form a small set: encoding, authorname, authoremail, committername, committeremail - StringPool stringPool = new(); - var buffer = new byte[4096]; foreach (var chunk in process.StandardOutput.BaseStream.ReadNullTerminatedChunks(ref buffer)) { token.ThrowIfCancellationRequested(); - if (TryParseRevision(module, chunk, stringPool, logOutputEncoding, out var revision)) + if (TryParseRevision(module, chunk, logOutputEncoding, sixMonths, out var revision) + && (revisionPredicate is null || revisionPredicate(revision))) { - if (revisionPredicate is null || revisionPredicate(revision)) - { - // The full commit message body is used initially in InMemFilter, after which it isn't - // strictly needed and can be re-populated asynchronously. - // - // We keep full multiline message bodies within the last six months. - // Commits earlier than that have their properties set to null and the - // memory will be GCd. - if (sixMonths > revision.AuthorUnixTime) - { - revision.Body = null; - } - - // Look up any refs associated with this revision - revision.Refs = refsByObjectId[revision.ObjectId].AsReadOnlyList(); + // Look up any refs associated with this revision + revision.Refs = refsByObjectId[revision.ObjectId].AsReadOnlyList(); - revisionCount++; + revisionCount++; - subject.OnNext(revision); - } + subject.OnNext(revision); } #if TRACE else @@ -164,7 +146,7 @@ public sealed class RevisionReader : IDisposable } #if TRACE - Trace.WriteLine($"**** [{nameof(RevisionReader)}] Emitted {revisionCount} revisions in {sw.Elapsed.TotalMilliseconds:#,##0.#} ms. bufferSize={buffer.Length} poolCount={stringPool.Count} parseErrors={parseErrors}"); + Trace.WriteLine($"**** [{nameof(RevisionReader)}] Emitted {revisionCount} revisions in {sw.Elapsed.TotalMilliseconds:#,##0.#} ms. bufferSize={buffer.Length} parseErrors={parseErrors}"); #endif } @@ -243,7 +225,7 @@ private static void UpdateSelectedRef(GitModule module, IReadOnlyList r } } - private static bool TryParseRevision(GitModule module, ArraySegment chunk, StringPool stringPool, Encoding logOutputEncoding, [NotNullWhen(returnValue: true)] out GitRevision? revision) + private static bool TryParseRevision(GitModule module, ArraySegment chunk, Encoding logOutputEncoding, long sixMonths, [NotNullWhen(returnValue: true)] out GitRevision? revision) { // The 'chunk' of data contains a complete git log item, encoded. // This method decodes that chunk and produces a revision object. @@ -252,7 +234,7 @@ private static bool TryParseRevision(GitModule module, ArraySegment chunk, // at the beginning of the chunk. The latter part of the chunk will require // decoding as a string. - if (chunk.Count == 0) + if (chunk.Count < ObjectId.Sha1CharCount * 2) { revision = default; return false; @@ -260,80 +242,66 @@ private static bool TryParseRevision(GitModule module, ArraySegment chunk, #region Object ID, Tree ID, Parent IDs + ReadOnlySpan array = chunk.AsSpan(); + // The first 40 bytes are the revision ID and the tree ID back to back - if (!ObjectId.TryParseAsciiHexBytes(chunk, 0, out var objectId) || - !ObjectId.TryParseAsciiHexBytes(chunk, ObjectId.Sha1CharCount, out var treeId)) + if (!ObjectId.TryParseAsciiHexReadOnlySpan(array.Slice(0, ObjectId.Sha1CharCount), out var objectId) || + !ObjectId.TryParseAsciiHexReadOnlySpan(array.Slice(ObjectId.Sha1CharCount, ObjectId.Sha1CharCount), out var treeId)) { revision = default; return false; } - var array = chunk.Array; - var offset = chunk.Offset + (ObjectId.Sha1CharCount * 2); - var lastOffset = chunk.Offset + chunk.Count; + var offset = ObjectId.Sha1CharCount * 2; // Next we have zero or more parent IDs separated by ' ' and terminated by '\n' - var parentIds = new ObjectId[CountParents(offset)]; - var parentIndex = 0; - - int CountParents(int baseOffset) + int noParents = CountParents(ref array, offset); + if (noParents < 0) { - if (array[baseOffset] == '\n') - { - return 0; - } + // Parse issue + revision = default; + return false; + } - var count = 1; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int CountParents(ref ReadOnlySpan array, int baseOffset) + { + int count = 0; - while (true) + while (baseOffset < array.Length && array[baseOffset] != '\n') { + Debug.Assert(count == 0 || array[baseOffset] == ' ', $"Unexpected contents in the parent array: {array[baseOffset]}/{count}"); baseOffset += ObjectId.Sha1CharCount; - var c = array[baseOffset]; - - if (c != ' ') + if (count > 0) { - break; + // Except for the first parent, advance after the space + baseOffset++; } count++; - baseOffset++; } - return count; - } - - while (true) - { - if (offset >= lastOffset - ObjectId.Sha1CharCount - 1) + if (baseOffset >= array.Length || array[baseOffset] != '\n') { - revision = default; - return false; + return -1; } - var b = array[offset]; - - if (b == '\n') - { - // There are no more parent IDs - offset++; - break; - } + return count; + } - if (b == ' ') - { - // We are starting a new parent ID - offset++; - } + var parentIds = new ObjectId[noParents]; - if (!ObjectId.TryParseAsciiHexBytes(array, offset, out var parentId)) + for (int parentIndex = 0; parentIndex < noParents; parentIndex++) + { + if (!ObjectId.TryParseAsciiHexReadOnlySpan(array.Slice(offset, ObjectId.Sha1CharCount), out ObjectId parentId)) { // TODO log this parse problem revision = default; return false; } - parentIds[parentIndex++] = parentId; - offset += ObjectId.Sha1CharCount; + parentIds[parentIndex] = parentId; + offset += ObjectId.Sha1CharCount + 1; } #endregion @@ -341,10 +309,11 @@ int CountParents(int baseOffset) #region Timestamps // Lines 2 and 3 are timestamps, as decimal ASCII seconds since the unix epoch, each terminated by `\n` - var authorUnixTime = ParseUnixDateTime(); - var commitUnixTime = ParseUnixDateTime(); + var authorUnixTime = ParseUnixDateTime(ref array); + var commitUnixTime = ParseUnixDateTime(ref array); - long ParseUnixDateTime() + [MethodImpl(MethodImplOptions.AggressiveInlining)] + long ParseUnixDateTime(ref ReadOnlySpan array) { long unixTime = 0; @@ -369,61 +338,54 @@ long ParseUnixDateTime() string? encodingName; Encoding encoding; - var encodingNameEndOffset = Array.IndexOf(array, (byte)'\n', offset); + var encodingNameEndLength = array[offset..].IndexOf((byte)'\n'); - if (encodingNameEndOffset == -1) + if (encodingNameEndLength == -1) { // TODO log this error case revision = default; return false; } - if (offset == encodingNameEndOffset) + if (encodingNameEndLength == 0) { - // No encoding specified + // No encoding specified, this is the normal case since Git 1.8.4 encoding = logOutputEncoding; encodingName = null; } else { - encodingName = logOutputEncoding.GetString(array, offset, encodingNameEndOffset - offset); + encodingName = logOutputEncoding.GetString(array.Slice(offset, encodingNameEndLength)); encoding = module.GetEncodingByGitName(encodingName) ?? Encoding.UTF8; } - offset = encodingNameEndOffset + 1; + offset += encodingNameEndLength + 1; #endregion #region Encoded string values (names, emails, subject, body, [file]name) // Finally, decode the names, email, subject and body strings using the required text encoding - var s = encoding.GetString(array, offset, lastOffset - offset); + var s = encoding.GetString(array[offset..]).AsSpan(); StringLineReader reader = new(s); - var author = reader.ReadLine(stringPool); - var authorEmail = reader.ReadLine(stringPool); - var committer = reader.ReadLine(stringPool); - var committerEmail = reader.ReadLine(stringPool); + var author = reader.ReadLine(); + var authorEmail = reader.ReadLine(); + var committer = reader.ReadLine(); + var committerEmail = reader.ReadLine(); - var subject = reader.ReadLine(advance: false).Trim(); + bool skipBody = sixMonths > authorUnixTime; + (string? subject, string? body, bool hasMultiLineMessage) = reader.PeakToEnd(skipBody); - if (author is null || authorEmail is null || committer is null || committerEmail is null || subject is null) - { - // TODO log this parse error - Debug.Fail("Unable to read an entry from the log -- this should not happen"); - revision = default; - return false; - } + // We keep a full multiline message body within the last six months. + // Note also that if body and subject are identical (single line), the body never need to be stored + skipBody = skipBody || !hasMultiLineMessage; - // NOTE the convention is that the Subject string is duplicated at the start of the Body string - // Therefore we read the subject twice. - // If there are not enough characters remaining for a body, then just assign the subject string directly. - var body = reader.ReadToEnd(); - if (body is null) + if (author is null || authorEmail is null || committer is null || committerEmail is null || subject is null || (skipBody != (body is null))) { // TODO log this parse error - Debug.Fail("Unable to read body from the log -- this should not happen"); + Debug.Fail("Unable to read an entry from the log -- this should not happen"); revision = default; return false; } @@ -443,7 +405,7 @@ long ParseUnixDateTime() MessageEncoding = encodingName, Subject = subject, Body = body, - HasMultiLineMessage = subject != body, + HasMultiLineMessage = hasMultiLineMessage, HasNotes = false }; @@ -460,12 +422,12 @@ public void Dispose() /// /// Simple type to walk along a string, line by line, without redundant allocations. /// - internal struct StringLineReader + internal ref struct StringLineReader { - private readonly string _s; + private ReadOnlySpan _s; private int _index; - public StringLineReader(string s) + public StringLineReader(ReadOnlySpan s) { _s = s; _index = 0; @@ -473,46 +435,50 @@ public StringLineReader(string s) public int Remaining => _s.Length - _index; - public string? ReadLine(StringPool? pool = null, bool advance = true) + public string? ReadLine() { - if (_index == _s.Length) + if (_index >= _s.Length) { return null; } var startIndex = _index; - var endIndex = _s.IndexOf('\n', startIndex); - - if (endIndex == -1) - { - return ReadToEnd(advance); - } + var lineLength = _s[_index..].IndexOf('\n'); - if (advance) + if (lineLength == -1) { - _index = endIndex + 1; + // Consider this as an error: PeakToEnd() should be explicitly used + return null; } - return pool is not null - ? pool.Intern(_s, startIndex, endIndex - startIndex) - : _s.Substring(startIndex, endIndex - startIndex); + _index += lineLength + 1; + return StringPool.Shared.GetOrAdd(_s.Slice(startIndex, lineLength)); } - public string? ReadToEnd(bool advance = true) + public (string? subject, string? body, bool hasMultiLineMessage) PeakToEnd(bool skipBody) { - if (_index == _s.Length) + if (_index >= _s.Length) { - return null; + return (null, null, false); } - var s = _s.Substring(_index).Trim(); + ReadOnlySpan bodySlice = _s[_index..].Trim(); - if (advance) - { - _index = _s.Length; - } + // Subject can also be defined as the contents before empty line (%s for --pretty), + // this uses the alternative definition of first line in body. + int lengthSubject = bodySlice.IndexOf('\n'); + bool hasMultiLineMessage = lengthSubject >= 0; + string subject = hasMultiLineMessage + ? bodySlice.Slice(0, lengthSubject).TrimEnd().ToString() + : bodySlice.ToString(); - return s; + // See caller for reasoning when message body can be omitted + // (String interning makes hasMultiLineMessage check only for clarity) + string? body = skipBody || !hasMultiLineMessage + ? null + : bodySlice.ToString(); + + return (subject, body, hasMultiLineMessage); } } @@ -533,8 +499,6 @@ internal TestAccessor(RevisionReader revisionReader) internal ArgumentBuilder BuildArgumentsBuildArguments(RefFilterOptions refFilterOptions, string branchFilter, string revisionFilter, string pathFilter) => _revisionReader.BuildArguments(refFilterOptions, branchFilter, revisionFilter, pathFilter); - - internal static StringLineReader MakeReader(string s) => new(s); } } } diff --git a/GitCommands/StringPool.cs b/GitCommands/StringPool.cs deleted file mode 100644 index dea1035cfd6..00000000000 --- a/GitCommands/StringPool.cs +++ /dev/null @@ -1,274 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Diagnostics.Contracts; -using System.Text.RegularExpressions; - -// ReSharper disable SuggestBaseTypeForParameter -// ReSharper disable ForCanBeConvertedToForeach - -namespace GitCommands -{ - /// - /// Pool for string instances, with the goal of drawing from the pool without requiring - /// a precisely trimmed string for the query. - /// - public sealed class StringPool - { - private object[] _buckets = new object[2048]; - private int _capacity = 1536; - - /// - /// Gets the number of items unique strings in the pool. - /// - public int Count { get; private set; } - - public string? Intern(string source, Capture capture) => Intern(source, capture.Index, capture.Length); - - public string? Intern(string source, int index, int length) - { - if (Count >= _capacity) - { - Grow(); - } - - var hash = GetSubstringHashCode(source, index, length); - var pos = (uint)hash % _buckets.Length; - var bucket = _buckets[pos]; - - if (bucket is string s) - { - if (s.Length == length && EqualsAtIndex(source, index, s)) - { - return s; - } - - Count++; - - var substring = source.Substring(index, length); - _buckets[pos] = new List(2) { s, substring }; - return substring; - } - - if (bucket is List list) - { - for (var i = 0; i < list.Count; i++) - { - var item = list[i]; - if (item.Length == length && EqualsAtIndex(source, index, item)) - { - return item; - } - } - - Count++; - - var substring = source.Substring(index, length); - list.Add(source.Substring(index, length)); - return substring; - } - - if (bucket is null) - { - Count++; - - var substring = source.Substring(index, length); - _buckets[pos] = substring; - return substring; - } - - Debug.Fail("Bucket had prohibited type"); - return null; - } - - private void Grow() - { - _capacity *= 2; - - var newBuckets = new object[_buckets.Length * 2]; - Stack> lists = new(); - - for (var i = 0; i < _buckets.Length; i++) - { - switch (_buckets[i]) - { - case string s: - { - HashString(s); - break; - } - - case List list: - { - for (var j = 0; j < list.Count; j++) - { - HashString(list[j]); - } - - list.Clear(); - lists.Push(list); - break; - } - } - } - - _buckets = newBuckets; - - void HashString(string s) - { - var hash = GetSubstringHashCode(s, 0, s.Length); - var index = Math.Abs(hash % newBuckets.Length); - - switch (newBuckets[index]) - { - case null: - { - newBuckets[index] = s; - break; - } - - case string old when lists.Count != 0: - { - var list = lists.Pop(); - list.Add(old); - list.Add(s); - newBuckets[index] = list; - break; - } - - case string old: - { - newBuckets[index] = new List(2) { old, s }; - break; - } - - case List list: - { - list.Add(s); - break; - } - } - } - } - - #region Zero-allocation equality and hashing from substrings - - /// - /// Determines whether exists at in . - /// - /// - /// Any content within outside of the range denoted by - /// and length of is ignored. - /// This method performs no allocation. - /// - /// The string to search in for . - /// The offset within at which to start looking for . - /// The string to search for in . - /// true if the string is found at the required position, otherwise false. - /// at would extend beyond the - /// range of . - [Pure] - internal static unsafe bool EqualsAtIndex(string source, int index, string comparand) - { - var len = comparand.Length; - - if (index + comparand.Length > source.Length) - { - throw new InvalidOperationException("Index and length extend beyond end of source string."); - } - - fixed (char* pc = comparand) - { - fixed (char* ps = source) - { - // Create writable pointers to equivalent positions in each string - var c = pc; - var s = &ps[index]; - - // Loop every 10 characters (20 bytes each loop) - while (len >= 10) - { - // Compare an int by int, 5 times - if (*(int*)c != *(int*)s || - *(int*)(c + 2) != *(int*)(s + 2) || - *(int*)(c + 4) != *(int*)(s + 4) || - *(int*)(c + 6) != *(int*)(s + 6) || - *(int*)(c + 8) != *(int*)(s + 8)) - { - return false; - } - - // Update the pointers. - // We delay this (rather than using ++ inline) to avoid - // CPU stall on flushing writes. - c += 10; - s += 10; - len -= 10; - } - - // Loop every 2 characters (4 bytes) - while (len > 1 && *(int*)c == *(int*)s) - { - // Compare int by int (4 bytes each loop) - c += 2; - s += 2; - len -= 2; - } - - // If last byte has odd index, check it too - return len == 0 || (len == 1 && *c == *s); - } - } - } - - [Pure] - internal static unsafe int GetSubstringHashCode(string str, int index, int length) - { - if (index + length > str.Length) - { - throw new InvalidOperationException("Index and length extend beyond end of string."); - } - - // Slightly modified version of https://referencesource.microsoft.com/#mscorlib/system/string.cs,827 - - // Pin the object - fixed (char* src = str) - { - // Accumulators - var hash1 = 0x1505; - var hash2 = hash1; - - // Pointer to the first character of the hash - var s = src + index; - - // Pointer to the last character of the hash - var end = s + length; - - var rem = length % 2; - - // If there are an odd number of items, ignore the last one for now - end -= rem; - - // Walk characters, alternating between hash1 and hash2 - while (s < end) - { - // Add character to hash 1 - hash1 = ((hash1 << 5) + hash1) ^ *s++; - - // Add character to hash 2 - hash2 = ((hash2 << 5) + hash2) ^ *s++; - } - - if (rem == 1) - { - hash1 = ((hash1 << 5) + hash1) ^ *s; - } - - // Combine the two hashes for the final hash - return hash2 + (hash1 * 0x5D588B65); - } - } - - #endregion - } -} diff --git a/GitUI/CommandsDialogs/FormVerify.LostObject.cs b/GitUI/CommandsDialogs/FormVerify.LostObject.cs index b38d772a799..52f961f8c63 100644 --- a/GitUI/CommandsDialogs/FormVerify.LostObject.cs +++ b/GitUI/CommandsDialogs/FormVerify.LostObject.cs @@ -112,7 +112,7 @@ private LostObject(LostObjectType objectType, string rawType, ObjectId objectId) { result.Author = module.ReEncodeStringFromLossless(logPatternMatch.Groups[1].Value); string encodingName = logPatternMatch.Groups[2].Value; - result.Subject = module.ReEncodeCommitMessage(logPatternMatch.Groups[3].Value, encodingName); + result.Subject = module.ReEncodeCommitMessage(logPatternMatch.Groups[3].Value, encodingName) ?? ""; result.Date = DateTimeUtils.ParseUnixTime(logPatternMatch.Groups[4].Value); if (logPatternMatch.Groups.Count >= 5) { diff --git a/Packages.props b/Packages.props index 5f1e1a7daaf..bc17ed24618 100644 --- a/Packages.props +++ b/Packages.props @@ -12,6 +12,7 @@ + diff --git a/Plugins/GitUIPluginInterfaces/GitRevision.cs b/Plugins/GitUIPluginInterfaces/GitRevision.cs index 7fa84cb6e80..dd0f029b4ff 100644 --- a/Plugins/GitUIPluginInterfaces/GitRevision.cs +++ b/Plugins/GitUIPluginInterfaces/GitRevision.cs @@ -25,6 +25,7 @@ public sealed class GitRevision : IGitItem, INotifyPropertyChanged public static readonly Regex Sha1HashShortRegex = new(@"\b[a-f\d]{7,40}\b", RegexOptions.Compiled); private BuildInfo? _buildStatus; + private string? _body; public GitRevision(ObjectId objectId) { @@ -80,7 +81,13 @@ private static DateTime FromUnixTimeSeconds(long unixTime) public string Subject { get; set; } = ""; - public string? Body { get; set; } + public string? Body + { + // Body is not stored by default for older commits to reduce memory usage + // Body do not have to be stored explicitly if same as subject and not multiline + get => _body ?? (!HasMultiLineMessage ? Subject : null); + set => _body = value; + } public bool HasMultiLineMessage { get; set; } public bool HasNotes { get; set; } diff --git a/Plugins/GitUIPluginInterfaces/ObjectId.cs b/Plugins/GitUIPluginInterfaces/ObjectId.cs index fd7d55ce4d3..b1e9bbc6bc6 100644 --- a/Plugins/GitUIPluginInterfaces/ObjectId.cs +++ b/Plugins/GitUIPluginInterfaces/ObjectId.cs @@ -1,6 +1,7 @@ using System; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Runtime.CompilerServices; using System.Text.RegularExpressions; using System.Threading; using JetBrains.Annotations; @@ -290,9 +291,13 @@ public static bool TryParseAsciiHexBytes(ArraySegment bytes, int index, [N [MustUseReturnValue] public static bool TryParseAsciiHexBytes(ArraySegment bytes, [NotNullWhen(returnValue: true)] out ObjectId? objectId) { - var index = bytes.Offset; + ReadOnlySpan array = bytes.AsSpan(); + return TryParseAsciiHexReadOnlySpan(array, out objectId); + } - if (bytes.Count != Sha1CharCount) + public static bool TryParseAsciiHexReadOnlySpan(ReadOnlySpan array, [NotNullWhen(returnValue: true)] out ObjectId? objectId) + { + if (array.Length != Sha1CharCount) { objectId = default; return false; @@ -300,11 +305,11 @@ public static bool TryParseAsciiHexBytes(ArraySegment bytes, [NotNullWhen( var success = true; - var i1 = HexAsciiBytesToUInt32(index); - var i2 = HexAsciiBytesToUInt32(index + 8); - var i3 = HexAsciiBytesToUInt32(index + 16); - var i4 = HexAsciiBytesToUInt32(index + 24); - var i5 = HexAsciiBytesToUInt32(index + 32); + var i1 = HexAsciiBytesToUInt32(ref array, 0); + var i2 = HexAsciiBytesToUInt32(ref array, 8); + var i3 = HexAsciiBytesToUInt32(ref array, 16); + var i4 = HexAsciiBytesToUInt32(ref array, 24); + var i5 = HexAsciiBytesToUInt32(ref array, 32); if (success) { @@ -315,10 +320,8 @@ public static bool TryParseAsciiHexBytes(ArraySegment bytes, [NotNullWhen( objectId = default; return false; - uint HexAsciiBytesToUInt32(int j) + uint HexAsciiBytesToUInt32(ref ReadOnlySpan array, int j) { - var array = bytes.Array; - return (uint)(HexAsciiByteToInt(array[j]) << 28 | HexAsciiByteToInt(array[j + 1]) << 24 | HexAsciiByteToInt(array[j + 2]) << 20 | @@ -329,14 +332,15 @@ uint HexAsciiBytesToUInt32(int j) HexAsciiByteToInt(array[j + 7])); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] int HexAsciiByteToInt(byte b) { - if (b >= '0' && b <= '9') + if (b is >= (byte)'0' and <= (byte)'9') { return b - 48; } - if (b >= 'a' && b <= 'f') + if (b is >= (byte)'a' and <= (byte)'f') { return b - 87; } diff --git a/UnitTests/GitCommands.Tests/RevisionReaderTests.cs b/UnitTests/GitCommands.Tests/RevisionReaderTests.cs index 82659d32952..651787e19ae 100644 --- a/UnitTests/GitCommands.Tests/RevisionReaderTests.cs +++ b/UnitTests/GitCommands.Tests/RevisionReaderTests.cs @@ -1,6 +1,4 @@ -using System; -using System.Text; -using FluentAssertions; +using FluentAssertions; using GitCommands; using NUnit.Framework; diff --git a/UnitTests/GitCommands.Tests/StringPoolTests.cs b/UnitTests/GitCommands.Tests/StringPoolTests.cs deleted file mode 100644 index f5f33637152..00000000000 --- a/UnitTests/GitCommands.Tests/StringPoolTests.cs +++ /dev/null @@ -1,196 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using GitCommands; -using NUnit.Framework; - -namespace GitCommandsTests -{ - [TestFixture] - public sealed class StringPoolTests - { - [Test] - public void Intern() - { - StringPool pool = new(); - - const string source = "abcabcabcabcabc"; - - Assert.AreEqual( - "", - pool.Intern(source, 0, 0)); - Assert.AreEqual( - "a", - pool.Intern(source, 0, 1)); - Assert.AreEqual( - "ab", - pool.Intern(source, 0, 2)); - Assert.AreEqual( - "abc", - pool.Intern(source, 0, 3)); - - Assert.AreEqual(4, pool.Count); - - Assert.AreEqual( - "", - pool.Intern(source, 3, 0)); - Assert.AreEqual( - "a", - pool.Intern(source, 3, 1)); - Assert.AreEqual( - "ab", - pool.Intern(source, 3, 2)); - Assert.AreEqual( - "abc", - pool.Intern(source, 3, 3)); - - Assert.AreEqual(4, pool.Count); - - Assert.AreSame( - pool.Intern(source, 0, 3), - pool.Intern(source, 0, 3)); - Assert.AreSame( - pool.Intern(source, 0, 3), - pool.Intern(source, 3, 3)); - - Assert.AreNotEqual( - pool.Intern(source, 0, 3), - pool.Intern(source, 0, 2)); - Assert.AreNotEqual( - pool.Intern(source, 0, 3), - pool.Intern(source, 1, 3)); - - Assert.AreEqual(5, pool.Count); - } - - [Test] - public void EqualsAtIndex() - { - const string s = "01234567890123456789012345678901234567890123456789012345678901234567890123456789"; - - for (var index = 0; index <= s.Length; index++) - { - for (var length = 0; length < s.Length - index; length++) - { - const string format = "index={0} length={1}"; - - Assert.True(StringPool.EqualsAtIndex(s, index, s.Substring(index, length)), format, index, length); - - Assert.False(StringPool.EqualsAtIndex(s, index, s.Substring(index, length) + 'Z'), format, index, length); - - if (index > 0 && length > 0) - { - Assert.False(StringPool.EqualsAtIndex(s, index - 1, s.Substring(index, length)), format, index, length); - Assert.False(StringPool.EqualsAtIndex(s, index, s.Substring(index - 1, length)), format, index, length); - } - - if (index + length < s.Length) - { - if (length == 0) - { - Assert.True(StringPool.EqualsAtIndex(s, index + 1, s.Substring(index, length)), format, index, length); - Assert.True(StringPool.EqualsAtIndex(s, index, s.Substring(index + 1, length)), format, index, length); - } - else - { - Assert.False(StringPool.EqualsAtIndex(s, index + 1, s.Substring(index, length)), format, index, length); - Assert.False(StringPool.EqualsAtIndex(s, index, s.Substring(index + 1, length)), format, index, length); - } - } - } - } - } - - [Test] - public void GetSubstringHashCode() - { - const string s = "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`¬¦!\"£$€%^&*()-_=+[]{};:'@#~/?,.<>\\|"; - - for (var i = 0; i < 10; i++) - { - Console.Out.WriteLine($"0x{StringPool.GetSubstringHashCode(s, 0, i):X2}"); - } - - Assert.AreEqual(0x162A16FE, (uint)StringPool.GetSubstringHashCode(s, 0, 0)); - Assert.AreEqual(0x05E19FCE, (uint)StringPool.GetSubstringHashCode(s, 0, 1)); - Assert.AreEqual(0x05E4405D, (uint)StringPool.GetSubstringHashCode(s, 0, 2)); - Assert.AreEqual(0xFC2C8D57, (uint)StringPool.GetSubstringHashCode(s, 0, 3)); - Assert.AreEqual(0xFC833FEA, (uint)StringPool.GetSubstringHashCode(s, 0, 4)); - Assert.AreEqual(0x36D35466, (uint)StringPool.GetSubstringHashCode(s, 0, 5)); - Assert.AreEqual(0x42005971, (uint)StringPool.GetSubstringHashCode(s, 0, 6)); - Assert.AreEqual(0x4B54D52B, (uint)StringPool.GetSubstringHashCode(s, 0, 7)); - Assert.AreEqual(0xBC227B3E, (uint)StringPool.GetSubstringHashCode(s, 0, 8)); - Assert.AreEqual(0xCB2B1F36, (uint)StringPool.GetSubstringHashCode(s, 0, 9)); - - Assert.AreEqual(3779978672, (uint)StringPool.GetSubstringHashCode(s, 0, s.Length)); - } - - /* - Strings hashed 200,000 with minimum length 3 - 461 collisions over 445 indices - Most crowded bucket had 2 items - Average clashing bucket length 1.036 - */ - - [Test, Ignore("For hash analysis only, has no assertions")] - public void AnalyzeHashFunctionDistribution() - { - HashSet seenHashes = new(); - List collisions = new(); - - const int hashCount = 200_000; - const int sourceWidth = 40; - const int minLength = 3; - - var remaining = hashCount; - - while (remaining > 0) - { - for (var start = 0; start < sourceWidth; start++) - { - var maxLength = sourceWidth - start; - - for (var length = minLength; length < maxLength && remaining != 0; length++, remaining--) - { - var s = CreateRandomString(sourceWidth); - - var hash = StringPool.GetSubstringHashCode(s, start, length); - - if (!seenHashes.Add(hash)) - { - collisions.Add(hash); - } - } - } - } - - var crowdedBucketCount = collisions.Distinct().Count(); - var max = collisions.GroupBy(hash => hash).Select(g => g.Count()).Max(); - - Console.Out.WriteLine( - $"Strings hashed {hashCount:#,0} with minimum length {minLength}\n" + - $"{collisions.Count} collisions over {crowdedBucketCount} indices\n" + - $"Most crowded bucket had {max} items\n" + - $"Average clashing bucket length {(double)collisions.Count / crowdedBucketCount:0.###}"); - } - - private static readonly Random _random = new(); - - private static string CreateRandomString(int length) - { - const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; - - var charsLength = chars.Length; - var stringChars = new char[length]; - - for (var i = 0; i < length; i++) - { - var randomIndex = _random.Next(charsLength); - - stringChars[i] = chars[randomIndex]; - } - - return new string(stringChars); - } - } -} diff --git a/UnitTests/GitUI.Tests/UserControls/RevisionGrid/Graph/LaneInfoProviderTests.cs b/UnitTests/GitUI.Tests/UserControls/RevisionGrid/Graph/LaneInfoProviderTests.cs index 04016c40553..87df379b71a 100644 --- a/UnitTests/GitUI.Tests/UserControls/RevisionGrid/Graph/LaneInfoProviderTests.cs +++ b/UnitTests/GitUI.Tests/UserControls/RevisionGrid/Graph/LaneInfoProviderTests.cs @@ -234,7 +234,7 @@ public void GetLaneInfo_should_display_the_subject_if_singleline_body_null() _realCommitNode.GitRevision.HasMultiLineMessage = false; _laneNodeLocator.FindPrevNode(Arg.Any(), Arg.Any()).Returns(x => (_realCommitNode, isAtNode: false)); - GetLaneInfo_should_display(_realCommitNode, suffix: _realCommitNode.GitRevision.Subject); + GetLaneInfo_should_display(_realCommitNode); } [Test]