-
Notifications
You must be signed in to change notification settings - Fork 4.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Analyzer to recommend ThreadSafe collections in parallel context. #79419
Comments
@stephentoub Should this move to dotnet/runtime? |
I couldn't figure out the best area label to add to this issue. If you have write-permissions please help me learn by adding exactly one area label. |
Tagging subscribers to this area: @dotnet/area-system-collections Issue DetailsDescribe the problem you are trying to solveA common pattern is to run parallel processing and to produce a set of data. The default collection types like List and HashSet are not threadsafe and you may get unexpected behavior (like nulls being added to the collection) if you modify them from multiple threads at the same time. Describe suggestions on how to achieve the ruleI think there should be an analyzer that detects when you are using the non thread safe collections in the context of the built in parallelization mechanisms (ForAll or Parallel) and can swap out the collection you are using for a more appropriate collection. Good Examples:
Other examples:
Additional contextThe following set of tests demonstrates this issue in action along with potential rewrites. namespace NonThreadSafeCollections
{
using Microsoft.VisualStudio.TestPlatform.ObjectModel;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Security.Cryptography;
[TestClass]
public class NonThreadSafeCollections
{
static List<TestObject> generatedObjects = new List<TestObject>();
[TestMethod]
public void ParallelListNested()
{
Dictionary<(char, char), List<TestObject>> testObjects = new();
generatedObjects.AsParallel().ForAll(x => {
if (!testObjects.ContainsKey((x.FieldOne[0], x.FieldTwo[0])))
{
testObjects[(x.FieldOne[0], x.FieldTwo[0])] = new List<TestObject>();
}
testObjects[(x.FieldOne[0], x.FieldTwo[0])].Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Values.Select(x => x.Count).Sum());
Assert.IsFalse(testObjects.Any(x => x.Value.Any(y => y is null)));
}
[TestMethod]
public void ParallelListNested_Fixed()
{
ConcurrentDictionary<(char, char), ConcurrentBag<TestObject>> testObjects = new();
foreach(var character1 in chars)
{
foreach(var character2 in chars)
{
testObjects[(character1, character2)] = new ConcurrentBag<TestObject>();
}
}
generatedObjects.AsParallel().ForAll(x => {
testObjects[(x.FieldOne[0], x.FieldTwo[0])].Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Values.Select(x => x.Count).Sum());
Assert.IsFalse(testObjects.Any(x => x.Value.Any(y => y is null)));
}
[TestMethod]
public void SingleThreadedListNested()
{
Dictionary<(char, char), List<TestObject>> testObjects = new();
generatedObjects.ForEach(x => {
if (!testObjects.ContainsKey((x.FieldOne[0], x.FieldTwo[0])))
{
testObjects[(x.FieldOne[0], x.FieldTwo[0])] = new List<TestObject>();
}
testObjects[(x.FieldOne[0], x.FieldTwo[0])].Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Values.Select(x => x.Count).Sum());
Assert.IsFalse(testObjects.Any(x => x.Value.Any(y => y is null)));
}
[TestMethod]
public void ParallelList()
{
List<TestObject> testObjects = new();
generatedObjects.AsParallel().ForAll(x => {
testObjects.Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Count);
Assert.IsFalse(testObjects.Any(x => x is null));
}
[TestMethod]
public void ParallelList_Fixed()
{
ConcurrentBag<TestObject> testObjects = new();
generatedObjects.AsParallel().ForAll(x => {
testObjects.Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Count);
Assert.IsFalse(testObjects.Any(x => x is null));
}
[TestMethod]
public void SingleThreadedList()
{
List<TestObject> testObjects = new();
generatedObjects.ForEach(x => {
testObjects.Add(x);
});
Assert.AreEqual(generatedObjects.Count, testObjects.Count);
Assert.IsFalse(testObjects.Any(x => x is null));
}
[ClassInitialize]
public static void ClassInit(TestContext context)
{
for (int i = 0; i < 10000; i++)
{
generatedObjects.Add(MakeNewTestObject());
}
}
public static int GetRandomPositiveIndex(int max)
{
var randomInteger = uint.MaxValue;
while (randomInteger == uint.MaxValue)
{
byte[] data = RandomNumberGenerator.GetBytes(4);
randomInteger = BitConverter.ToUInt32(data, 0);
}
return (int)(max * (randomInteger / (double)uint.MaxValue));
}
public static string GetRandomString(int characters) => new(Enumerable.Range(1, characters).Select(_ => chars[GetRandomPositiveIndex(chars.Length)]).ToArray());
private const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static TestObject MakeNewTestObject(int fieldLength = 1000)
{
return new TestObject
{
FieldOne = GetRandomString(fieldLength),
FieldTwo = GetRandomString(fieldLength),
FieldThree = GetRandomString(fieldLength),
FieldFour = GetRandomString(fieldLength),
};
}
class TestObject
{
public string FieldOne { get; set; } = string.Empty;
public string FieldTwo { get; set; } = string.Empty;
public string FieldThree { get; set; } = string.Empty;
public string FieldFour { get; set; } = string.Empty;
}
}
}
|
Describe the problem you are trying to solve
A common pattern is to run parallel processing and to produce a set of data. The default collection types like List and HashSet are not threadsafe and you may get unexpected behavior (like nulls being added to the collection) if you modify them from multiple threads at the same time.
Describe suggestions on how to achieve the rule
I think there should be an analyzer that detects when you are using the non thread safe collections in the context of the built in parallelization mechanisms (ForAll or Parallel) and can swap out the collection you are using for a more appropriate collection.
Good Examples:
Other examples:
Additional context
The following set of tests demonstrates this issue in action along with potential rewrites.
The text was updated successfully, but these errors were encountered: