From 7841d83715e42cd60cc5b393e24e9bf68d7e091c Mon Sep 17 00:00:00 2001 From: stephentoub Date: Mon, 25 Apr 2016 09:03:30 -0400 Subject: [PATCH 1/2] Avoid defensive copy in String.Concat(string[]) Today the String.Concat(params string[]) implementation makes a defensive copy of the input string[] in order to avoid issues where another thread mutates the array concurrently with the concatenation. Such a situation is possible but exceedingly rare, so we can redo the implementation to make it cheaper when there isn't concurrent mutation and more expensive when there is, rather than always having it be more expensive. This commit changes string.Concat to optimistically assume there won't be such concurrent mutation. It avoids allocating the defensive string[] array copy, and instead just proceeds to allocate a string of the right length and copy the input strings into it. If along the way it discovers that something has changed such that the string lengths no longer add up to exactly what was precomputed, then it falls back to the original implementation. Example microbenchmark: ```C# using System; using System.Diagnostics; public class Program { public static void Main() { string result; string[] inputs = new[] { "abcd", "efgh", "ijkl", "mnop", "qrst", "uvwx", "yz" }; var sw = new Stopwatch(); while (true) { int gen0 = GC.CollectionCount(0); sw.Restart(); for (int i = 0; i < 20000000; i++) result = string.Concat(inputs); sw.Stop(); Console.WriteLine($"{GC.CollectionCount(0) - gen0}: {sw.Elapsed.TotalSeconds}"); } } } ``` Before the change: ``` > corerun test.exe 1525: 2.0733829 1526: 2.0599474 1526: 2.0717786 1526: 2.0318797 ^C ``` After the change: ``` 763: 1.4700695 762: 1.446919 763: 1.4581139 763: 1.4568889 ^C ``` --- src/mscorlib/src/System/String.cs | 66 +++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/src/mscorlib/src/System/String.cs b/src/mscorlib/src/System/String.cs index 3d2ac0ff2a7c..2efc19aedf05 100644 --- a/src/mscorlib/src/System/String.cs +++ b/src/mscorlib/src/System/String.cs @@ -3339,13 +3339,73 @@ public static String Concat(params String[] values) { if (values == null) throw new ArgumentNullException("values"); Contract.Ensures(Contract.Result() != null); - // Spec#: Consider a postcondition saying the length of this string == the sum of each string in array Contract.EndContractBlock(); - int totalLength=0; + // It's possible that the input values array could be changed concurrently on another + // thread, such that we can't trust that each read of values[i] will be equivalent. + // Worst case, we can make a defensive copy of the array and use that, but we first + // optimistically try the allocation and copies assuming that the array isn't changing, + // which represents the 99.999% case, in particular since string.Concat is used for + // string concatenation by the languages, with the input array being a params array. + + // Sum the lengths of all input strings + long totalLengthLong = 0; + for (int i = 0; i < values.Length; i++) + { + string value = values[i]; + if (value != null) + { + totalLengthLong += value.Length; + } + } + + // If it's too long, fail, or if it's empty, return an empty string. + if (totalLengthLong > int.MaxValue) + { + throw new OutOfMemoryException(); + } + int totalLength = (int)totalLengthLong; + if (totalLength == 0) + { + return string.Empty; + } + + // Allocate a new string and copy each input string into it + string result = FastAllocateString(totalLength); + int copiedLength = 0; + for (int i = 0; i < values.Length; i++) + { + string value = values[i]; + if (!string.IsNullOrEmpty(value)) + { + int valueLen = value.Length; + if (valueLen > totalLength - copiedLength) + { + // Something changed concurrently. Fall back to a defensive copy. + return ConcatWithDefensiveCopy(values); + } + + FillStringChecked(result, copiedLength, value); + copiedLength += valueLen; + } + } + + // If we copied exactly the right amount, return the new string. Otherwise, + // something changed concurrently to mutate the input array: fall back to + // doing the concatenation again, but this time with a defensive copy. This + // fall back should be extremely rare. + return copiedLength == totalLength ? + result : + ConcatWithDefensiveCopy(values); + } + + private static String ConcatWithDefensiveCopy(string[] values) + { + int totalLength = 0; + // Always make a copy to prevent changing the array on another thread. String[] internalValues = new String[values.Length]; - + for (int i=0; i Date: Mon, 25 Apr 2016 11:15:04 -0400 Subject: [PATCH 2/2] Address PR feedback Use a recursive call to Concat rather than using a separate ConcatWithDefensiveCopy method. --- src/mscorlib/src/System/String.cs | 56 ++++++++----------------------- 1 file changed, 14 insertions(+), 42 deletions(-) diff --git a/src/mscorlib/src/System/String.cs b/src/mscorlib/src/System/String.cs index 2efc19aedf05..ebcff1fe0f4e 100644 --- a/src/mscorlib/src/System/String.cs +++ b/src/mscorlib/src/System/String.cs @@ -3174,7 +3174,17 @@ public static String Concat(params Object[] args) { throw new OutOfMemoryException(); } } - return ConcatArray(sArgs, totalLength); + + string result = FastAllocateString(totalLength); + int currPos = 0; + for (int i = 0; i < sArgs.Length; i++) + { + Contract.Assert(currPos <= totalLength - sArgs[i].Length, "[String.Concat](currPos <= totalLength - sArgs[i].Length)"); + FillStringChecked(result, currPos, sArgs[i]); + currPos += sArgs[i].Length; + } + + return result; } [ComVisible(false)] @@ -3319,22 +3329,6 @@ public static String Concat(String str0, String str1, String str2, String str3) return result; } - [System.Security.SecuritySafeCritical] // auto-generated - private static String ConcatArray(String[] values, int totalLength) { - String result = FastAllocateString(totalLength); - int currPos=0; - - for (int i=0; i totalLength - copiedLength) { - // Something changed concurrently. Fall back to a defensive copy. - return ConcatWithDefensiveCopy(values); + copiedLength = -1; + break; } FillStringChecked(result, copiedLength, value); @@ -3394,29 +3388,7 @@ public static String Concat(params String[] values) { // something changed concurrently to mutate the input array: fall back to // doing the concatenation again, but this time with a defensive copy. This // fall back should be extremely rare. - return copiedLength == totalLength ? - result : - ConcatWithDefensiveCopy(values); - } - - private static String ConcatWithDefensiveCopy(string[] values) - { - int totalLength = 0; - - // Always make a copy to prevent changing the array on another thread. - String[] internalValues = new String[values.Length]; - - for (int i=0; i