Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use threadlocal storage for pooling our matrices. #11014

Merged
merged 5 commits into from
May 5, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 57 additions & 81 deletions src/Workspaces/Core/Portable/Utilities/EditDistance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Threading;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sort

using Microsoft.CodeAnalysis;
using static System.Math;

namespace Roslyn.Utilities
{
Expand Down Expand Up @@ -110,7 +110,11 @@ public int GetEditDistance(string target, int threshold = int.MaxValue)
}

private const int MaxMatrixPoolDimension = 64;
private static readonly SimplePool<int[,]> s_matrixPool = new SimplePool<int[,]>(() => InitializeMatrix(new int[64, 64]));
private static readonly ThreadLocal<int[,]> t_matrixPool =
new ThreadLocal<int[,]>(() => InitializeMatrix(new int[MaxMatrixPoolDimension, MaxMatrixPoolDimension]));

private static ThreadLocal<Dictionary<char, int>> t_dictionaryPool =
new ThreadLocal<Dictionary<char, int>>(() => new Dictionary<char, int>());

private static int[,] GetMatrix(int width, int height)
{
Expand All @@ -119,15 +123,7 @@ public int GetEditDistance(string target, int threshold = int.MaxValue)
return InitializeMatrix(new int[width, height]);
}

return s_matrixPool.Allocate();
}

private static void ReleaseMatrix(int[,] matrix)
{
if (matrix.GetLength(0) <= MaxMatrixPoolDimension && matrix.GetLength(1) <= MaxMatrixPoolDimension)
{
s_matrixPool.Free(matrix);
}
return t_matrixPool.Value;
}

private static int[,] InitializeMatrix(int[,] matrix)
Expand Down Expand Up @@ -182,8 +178,6 @@ public static int GetEditDistance(ArraySlice<char> source, ArraySlice<char> targ
: GetEditDistanceWorker(target, source, threshold);
}

private static SimplePool<Dictionary<char, int>> s_dictionaryPool = new SimplePool<Dictionary<char, int>>(() => new Dictionary<char, int>());

private static int GetEditDistanceWorker(ArraySlice<char> source, ArraySlice<char> target, int threshold)
{
// Note: sourceLength will always be smaller or equal to targetLength.
Expand Down Expand Up @@ -497,74 +491,68 @@ private static int GetEditDistanceWorker(ArraySlice<char> source, ArraySlice<cha
Debug.Assert(offset >= 0);

var matrix = GetMatrix(sourceLength + 2, targetLength + 2);
var characterToLastSeenIndex_inSource = s_dictionaryPool.AllocateAndClear();

try
var characterToLastSeenIndex_inSource = t_dictionaryPool.Value;
characterToLastSeenIndex_inSource.Clear();

for (int i = 1; i <= sourceLength; i++)
{
for (int i = 1; i <= sourceLength; i++)
var lastMatchIndex_inTarget = 0;
var sourceChar = source[i - 1];

// Determinethe portion of the column we actually want to examine.
var jStart = Math.Max(1, i - offset);
var jEnd = Math.Min(targetLength, i + minimumEditCount + offset);

// If we're examining only a subportion of the column, then we need to make sure
// that the values outside that range are set to Infinity. That way we don't
// consider them when we look through edit paths from above (for this column) or
// from the left (for the next column).
if (jStart > 1)
{
var lastMatchIndex_inTarget = 0;
var sourceChar = source[i - 1];

// Determinethe portion of the column we actually want to examine.
var jStart = Math.Max(1, i - offset);
var jEnd = Math.Min(targetLength, i + minimumEditCount + offset);

// If we're examining only a subportion of the column, then we need to make sure
// that the values outside that range are set to Infinity. That way we don't
// consider them when we look through edit paths from above (for this column) or
// from the left (for the next column).
if (jStart > 1)
{
matrix[i + 1, jStart] = Infinity;
}
matrix[i + 1, jStart] = Infinity;
}

if (jEnd < targetLength)
{
matrix[i + 1, jEnd + 2] = Infinity;
}
if (jEnd < targetLength)
{
matrix[i + 1, jEnd + 2] = Infinity;
}

for (int j = jStart; j <= jEnd; j++)
{
var targetChar = target[j - 1];

var i1 = GetValue(characterToLastSeenIndex_inSource, targetChar);
var j1 = lastMatchIndex_inTarget;

var matched = sourceChar == targetChar;
if (matched)
{
lastMatchIndex_inTarget = j;
}

matrix[i + 1, j + 1] = Min(
matrix[i, j] + (matched ? 0 : 1),
matrix[i + 1, j] + 1,
matrix[i, j + 1] + 1,
matrix[i1, j1] + (i - i1 - 1) + 1 + (j - j1 - 1));
}
for (int j = jStart; j <= jEnd; j++)
{
var targetChar = target[j - 1];

characterToLastSeenIndex_inSource[sourceChar] = i;
var i1 = GetValue(characterToLastSeenIndex_inSource, targetChar);
var j1 = lastMatchIndex_inTarget;

// Recall that minimumEditCount is simply the difference in length of our two
// strings. So matrix[i+1,i+1] is the cost for the upper-left diagonal of the
// matrix. matrix[i+1,i+1+minimumEditCount] is the cost for the lower right diagonal.
// Here we are simply getting the lowest cost edit of hese two substrings so far.
// If this lowest cost edit is greater than our threshold, then there is no need
// to proceed.
if (matrix[i + 1, i + minimumEditCount + 1] > threshold)
var matched = sourceChar == targetChar;
if (matched)
{
return BeyondThreshold;
lastMatchIndex_inTarget = j;
}

matrix[i + 1, j + 1] = Min(
matrix[i, j] + (matched ? 0 : 1),
matrix[i + 1, j] + 1,
matrix[i, j + 1] + 1,
matrix[i1, j1] + (i - i1 - 1) + 1 + (j - j1 - 1));
}

return matrix[sourceLength + 1, targetLength + 1];
}
finally
{
ReleaseMatrix(matrix);
s_dictionaryPool.Free(characterToLastSeenIndex_inSource);
characterToLastSeenIndex_inSource[sourceChar] = i;

// Recall that minimumEditCount is simply the difference in length of our two
// strings. So matrix[i+1,i+1] is the cost for the upper-left diagonal of the
// matrix. matrix[i+1,i+1+minimumEditCount] is the cost for the lower right diagonal.
// Here we are simply getting the lowest cost edit of hese two substrings so far.
// If this lowest cost edit is greater than our threshold, then there is no need
// to proceed.
if (matrix[i + 1, i + minimumEditCount + 1] > threshold)
{
return BeyondThreshold;
}
}

return matrix[sourceLength + 1, targetLength + 1];
}

private static string ToString(int[,] matrix, int width, int height)
Expand Down Expand Up @@ -657,18 +645,6 @@ public void Free(T value)
}
}

internal static class SimplePoolExtensions
{
public static Dictionary<TKey, TValue> AllocateAndClear<TKey, TValue>(
this SimplePool<Dictionary<TKey, TValue>> pool)
{
var map = pool.Allocate();
map.Clear();

return map;
}
}

internal static class ArrayPool<T>
{
private const int MaxPooledArraySize = 256;
Expand Down